From 204f0bec253938af7e144ab249dfeae45e238a06 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 25 Nov 2024 20:56:13 +0000
Subject: [PATCH] ci: auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ...1.0-mb-data-preprocessing-mem-reduce.ipynb |   22 +-
 ....0a-mb-data-preprocessing-supervised.ipynb |   62 +-
 ...b-mb-data-preprocessing-unsupervised.ipynb |   73 +-
 .../3.0a-mb-explanatory-data-analysis.ipynb   |  313 +++--
 ....0b-mb-explanatory-matched-unmatched.ipynb |   51 +-
 notebooks/3.0c-feature-engineering.ipynb      |  147 ++-
 notebooks/3.0d-mb-adv_val.ipynb               |   26 +-
 notebooks/4.0a-mb-logistic-regression.ipynb   |   65 +-
 notebooks/4.0b-mb-fttransformer.ipynb         |   57 +-
 notebooks/4.0c-mb-feature-importances.ipynb   |  479 ++++---
 .../4.0e-mb-fttransformer-pretraining.ipynb   |  124 +-
 notebooks/5.0a-mb-batch-size-finder.ipynb     |    9 +-
 notebooks/6.0a-mb-results-fttransformer.ipynb |   51 +-
 .../6.0b-mb-results-classical-rules.ipynb     |   40 +-
 notebooks/6.0c-mb-results-universal.ipynb     |  233 ++--
 .../6.0d-mb-results-gradient-boosting.ipynb   |   17 +-
 notebooks/6.0e-mb-viz-universal.ipynb         | 1158 ++++++++++++-----
 notebooks/6.0f-mb-viz-gradient-boosting.ipynb |  127 +-
 notebooks/6.0g-mb-viz-fttransformer.ipynb     |   41 +-
 notebooks/6.0h-mb-viz-embeddings.ipynb        |   79 +-
 notebooks/6.0i-mb-discussion.ipynb            |  163 ++-
 src/otc/models/fttransformer.py               |    6 +-
 22 files changed, 2016 insertions(+), 1327 deletions(-)

diff --git a/notebooks/1.0-mb-data-preprocessing-mem-reduce.ipynb b/notebooks/1.0-mb-data-preprocessing-mem-reduce.ipynb
index 9291c322..385295a0 100644
--- a/notebooks/1.0-mb-data-preprocessing-mem-reduce.ipynb
+++ b/notebooks/1.0-mb-data-preprocessing-mem-reduce.ipynb
@@ -31,7 +31,7 @@
     "ProgressBar.enable()\n",
     "\n",
     "import wandb\n",
-    "from tqdm.auto import tqdm\n"
+    "from tqdm.auto import tqdm"
    ]
   },
   {
@@ -47,7 +47,7 @@
     "FILE_PATH_INPUT = (\n",
     "    \"gs://thesis-bucket-option-trade-classification/data/raw/matched_cboe_quotes.csv\"\n",
     ")\n",
-    "FILE_PATH_OUTPUT = \"gs://thesis-bucket-option-trade-classification/data/preprocessed/\"\n"
+    "FILE_PATH_OUTPUT = \"gs://thesis-bucket-option-trade-classification/data/preprocessed/\""
    ]
   },
   {
@@ -58,7 +58,7 @@
    "source": [
     "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n",
     "credentials, _ = google.auth.default()\n",
-    "fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)\n"
+    "fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)"
    ]
   },
   {
@@ -76,7 +76,7 @@
    "source": [
     "# connect to weights and biases\n",
     "run = wandb.init(project=\"thesis\", job_type=\"dataset-creation\", entity=\"fbv\")\n",
-    "dataset = wandb.Artifact(name=f\"{EXCHANGE}_{STRATEGY}_csv\", type=\"raw_data\")\n"
+    "dataset = wandb.Artifact(name=f\"{EXCHANGE}_{STRATEGY}_csv\", type=\"raw_data\")"
    ]
   },
   {
@@ -88,8 +88,7 @@
    "outputs": [],
    "source": [
     "def import_data(input_file: str) -> pd.DataFrame:\n",
-    "    \"\"\"\n",
-    "    create a dataframe and optimize its memory usage.\n",
+    "    \"\"\"Create a dataframe and optimize its memory usage.\n",
     "\n",
     "    I.e., apply some optimizations i.e, manual inference of dtypes, pre-selection\n",
     "    of unique columns and chunking to enable import.\n",
@@ -189,7 +188,7 @@
     "\n",
     "    format = \"%d%b%y:%H:%M:%S\"\n",
     "    df[\"QUOTE_DATETIME\"] = pd.to_datetime(df[\"QUOTE_DATETIME\"], format=format)\n",
-    "    return df\n"
+    "    return df"
    ]
   },
   {
@@ -203,8 +202,7 @@
     "def df_to_parquet(\n",
     "    x: pd.DataFrame, target_dir: str, chunk_size: int = 1000000, **parquet_wargs\n",
     ") -> None:\n",
-    "    \"\"\"\n",
-    "    Write pd.DataFrame to parquet format.\n",
+    "    \"\"\"Write pd.DataFrame to parquet format.\n",
     "\n",
     "    Args:\n",
     "        x (pd.DataFrame): input dataframe.\n",
@@ -222,7 +220,7 @@
     "        slc.to_parquet(output_path, **parquet_wargs)\n",
     "\n",
     "        # log in w & b\n",
-    "        dataset.add_reference(output_path, name=f\"raw_parquet_{chunk:04d}\")\n"
+    "        dataset.add_reference(output_path, name=f\"raw_parquet_{chunk:04d}\")"
    ]
   },
   {
@@ -805,7 +803,7 @@
     "client = Client()\n",
     "\n",
     "df = import_data(FILE_PATH_INPUT)\n",
-    "df_to_parquet(df, FILE_PATH_OUTPUT)\n"
+    "df_to_parquet(df, FILE_PATH_OUTPUT)"
    ]
   },
   {
@@ -833,7 +831,7 @@
    "source": [
     "# Log the artifact to save it as an output of this run\n",
     "run.log_artifact(dataset)\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   }
  ],
diff --git a/notebooks/2.0a-mb-data-preprocessing-supervised.ipynb b/notebooks/2.0a-mb-data-preprocessing-supervised.ipynb
index 0716ca5d..5232b639 100644
--- a/notebooks/2.0a-mb-data-preprocessing-supervised.ipynb
+++ b/notebooks/2.0a-mb-data-preprocessing-supervised.ipynb
@@ -21,7 +21,7 @@
     "from pandas._testing.asserters import assert_almost_equal\n",
     "from tqdm.auto import tqdm\n",
     "\n",
-    "sys.path.append(\"..\")\n"
+    "sys.path.append(\"..\")"
    ]
   },
   {
@@ -34,7 +34,7 @@
    "source": [
     "EXCHANGE = \"cboe\"  # \"ise\"\n",
     "STRATEGY = \"transfer\"  # \"supervised\"\n",
-    "max_i = 50 if EXCHANGE == \"ise\" else 38  # number of partial files\n"
+    "max_i = 50 if EXCHANGE == \"ise\" else 38  # number of partial files"
    ]
   },
   {
@@ -53,7 +53,7 @@
    "source": [
     "# connect to weights and biases\n",
     "run = wandb.init(project=\"thesis\", job_type=\"dataset-creation\", entity=\"fbv\")\n",
-    "dataset = wandb.Artifact(name=f\"{EXCHANGE}_{STRATEGY}_raw\", type=\"preprocessed_data\")\n"
+    "dataset = wandb.Artifact(name=f\"{EXCHANGE}_{STRATEGY}_raw\", type=\"preprocessed_data\")"
    ]
   },
   {
@@ -67,7 +67,7 @@
    "source": [
     "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n",
     "\n",
-    "fs = gcsfs.GCSFileSystem(project=\"thesis\")\n"
+    "fs = gcsfs.GCSFileSystem(project=\"thesis\")"
    ]
   },
   {
@@ -99,7 +99,7 @@
    "source": [
     "files = [\n",
     "    f\"gs://thesis-bucket-option-trade-classification/data/preprocessed/{'unmatched' if STRATEGY == 'unsupervised' else 'matched'}_{EXCHANGE}_quotes_min_mem_usage_extended_part_{i:04d}.parquet\"\n",
-    "    for i in range(0, max_i)\n",
+    "    for i in range(max_i)\n",
     "]\n",
     "\n",
     "columns = [\n",
@@ -130,7 +130,7 @@
     "dfs = [pd.read_parquet(gc_file, columns=columns) for gc_file in tqdm(files)]\n",
     "df = pd.concat(dfs)\n",
     "\n",
-    "del dfs\n"
+    "del dfs"
    ]
   },
   {
@@ -146,7 +146,7 @@
    },
    "outputs": [],
    "source": [
-    "df.memory_usage(deep=True).sum()\n"
+    "df.memory_usage(deep=True).sum()"
    ]
   },
   {
@@ -157,7 +157,7 @@
    },
    "outputs": [],
    "source": [
-    "len(df)\n"
+    "len(df)"
    ]
   },
   {
@@ -207,7 +207,7 @@
     "    assert_almost_equal(\n",
     "        stats_trade_size.values.tolist(), [18.14, 5.0, 223.24], atol=0.1\n",
     "    )\n",
-    "    assert_almost_equal(stats_buy_trades, 0.4500, atol=0.01)\n"
+    "    assert_almost_equal(stats_buy_trades, 0.4500, atol=0.01)"
    ]
   },
   {
@@ -227,7 +227,7 @@
    },
    "outputs": [],
    "source": [
-    "df.sort_values(by=\"QUOTE_DATETIME\", inplace=True)\n"
+    "df.sort_values(by=\"QUOTE_DATETIME\", inplace=True)"
    ]
   },
   {
@@ -263,7 +263,7 @@
     "\n",
     "if EXCHANGE == \"cboe\" and STRATEGY == \"transfer\":\n",
     "    # use everything after *ISE* validation set for transfer learning\n",
-    "    test_range = df.QUOTE_DATETIME.between(\"2015-11-06 00:00:01\", \"2017-10-31 23:59:00\")\n"
+    "    test_range = df.QUOTE_DATETIME.between(\"2015-11-06 00:00:01\", \"2017-10-31 23:59:00\")"
    ]
   },
   {
@@ -280,7 +280,6 @@
    "outputs": [],
    "source": [
     "if STRATEGY == \"supervised\":\n",
-    "\n",
     "    train = df[train_range]\n",
     "\n",
     "    len_train = len(train)\n",
@@ -317,7 +316,7 @@
     "\n",
     "    output_path = f\"gs://thesis-bucket-option-trade-classification/data/preprocessed/{EXCHANGE}_{STRATEGY}_test.parquet\"\n",
     "    test.to_parquet(output_path)\n",
-    "    dataset.add_reference(output_path, name=\"test_set\")\n"
+    "    dataset.add_reference(output_path, name=\"test_set\")"
    ]
   },
   {
@@ -347,7 +346,7 @@
     "# Log the artifact to save it as an output of this run\n",
     "run.log_artifact(dataset)\n",
     "\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   },
   {
@@ -370,7 +369,7 @@
     "val = pd.read_parquet(\n",
     "    \"gs://thesis-bucket-option-trade-classification/data/ise_log_standardized/val_set.parquet\",\n",
     "    engine=\"fastparquet\",\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -382,7 +381,7 @@
     "val = pd.read_parquet(\n",
     "    \"gs://thesis-bucket-option-trade-classification/data/ise_log_standardized/val_set_20.parquet\",\n",
     "    engine=\"fastparquet\",\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -392,7 +391,7 @@
    "outputs": [],
    "source": [
     "y_train = train[\"buy_sell\"]\n",
-    "X_train = train.drop(columns=[\"buy_sell\"])\n"
+    "X_train = train.drop(columns=[\"buy_sell\"])"
    ]
   },
   {
@@ -402,7 +401,7 @@
    "outputs": [],
    "source": [
     "y_val = val[\"buy_sell\"]\n",
-    "X_val = val.drop(columns=[\"buy_sell\"])\n"
+    "X_val = val.drop(columns=[\"buy_sell\"])"
    ]
   },
   {
@@ -411,7 +410,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X_train.head()\n"
+    "X_train.head()"
    ]
   },
   {
@@ -431,7 +430,6 @@
     "    timestamp = np.linspace(0, 1, length)\n",
     "    # keep weight fixed\n",
     "    for strategy in [\"uniform\", \"exponential\"]:\n",
-    "\n",
     "        if strategy == \"uniform\":\n",
     "            weight = np.ones(length)\n",
     "        else:\n",
@@ -474,7 +472,7 @@
     "            \"strategy\": strategy,\n",
     "        }\n",
     "        print(res)\n",
-    "        results_p.append(res)\n"
+    "        results_p.append(res)"
    ]
   },
   {
@@ -483,7 +481,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "results_df = pd.DataFrame(results_p)\n"
+    "results_df = pd.DataFrame(results_p)"
    ]
   },
   {
@@ -492,7 +490,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "results_df\n"
+    "results_df"
    ]
   },
   {
@@ -501,7 +499,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "results_df.to_csv(\"learning_curves_gbm_default_params.csv\")\n"
+    "results_df.to_csv(\"learning_curves_gbm_default_params.csv\")"
    ]
   },
   {
@@ -533,7 +531,7 @@
     "data = pd.read_parquet(\n",
     "    \"gs://thesis-bucket-option-trade-classification/data/classical_size_features_log_normalized/train_set_extended_60.parquet\",\n",
     "    engine=\"fastparquet\",\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -546,7 +544,7 @@
    "source": [
     "# try to predict last 10 % in training set using first 10 % of features. Accuracy should be above 50 %.\n",
     "label = data[\"buy_sell\"]\n",
-    "data.drop(columns=[\"buy_sell\"], inplace=True)\n"
+    "data.drop(columns=[\"buy_sell\"], inplace=True)"
    ]
   },
   {
@@ -563,7 +561,7 @@
     "X_train = data.iloc[0 : len(data) // 10, :]\n",
     "X_test = data.iloc[-len(data) // 10 :, :]\n",
     "\n",
-    "del label, data\n"
+    "del label, data"
    ]
   },
   {
@@ -578,7 +576,7 @@
    },
    "outputs": [],
    "source": [
-    "y_train.shape\n"
+    "y_train.shape"
    ]
   },
   {
@@ -599,7 +597,7 @@
     "    \"eval_metric\": \"Accuracy\",\n",
     "    \"iterations\": 1000,\n",
     "    \"early_stopping_rounds\": 100,\n",
-    "}\n"
+    "}"
    ]
   },
   {
@@ -610,7 +608,7 @@
    },
    "outputs": [],
    "source": [
-    "columns = X_train.columns\n"
+    "columns = X_train.columns"
    ]
   },
   {
@@ -644,7 +642,7 @@
     "    model = CatBoostClassifier(**params)\n",
     "    model.fit(X_train[[col]], y_train, eval_set=(X_test[[col]], y_test))\n",
     "    acc = model.score(X_test[[col]], y_test)\n",
-    "    results.append([col, acc])\n"
+    "    results.append([col, acc])"
    ]
   },
   {
@@ -661,7 +659,7 @@
    "outputs": [],
    "source": [
     "results_df = pd.DataFrame(results, columns=[\"feature\", \"accuracy\"])\n",
-    "results_df.sort_values(by=\"accuracy\")\n"
+    "results_df.sort_values(by=\"accuracy\")"
    ]
   },
   {
diff --git a/notebooks/2.0b-mb-data-preprocessing-unsupervised.ipynb b/notebooks/2.0b-mb-data-preprocessing-unsupervised.ipynb
index 3fa34286..653545e7 100644
--- a/notebooks/2.0b-mb-data-preprocessing-unsupervised.ipynb
+++ b/notebooks/2.0b-mb-data-preprocessing-unsupervised.ipynb
@@ -14,8 +14,7 @@
     "import gcsfs\n",
     "import pandas as pd\n",
     "import wandb\n",
-    "\n",
-    "from tqdm.auto import tqdm\n"
+    "from tqdm.auto import tqdm"
    ]
   },
   {
@@ -28,7 +27,7 @@
    "source": [
     "exchange = \"ise\"\n",
     "strategy = \"unsupervised\"\n",
-    "max_i = 30  # number of partial files\n"
+    "max_i = 30  # number of partial files"
    ]
   },
   {
@@ -47,7 +46,7 @@
    "source": [
     "# connect to weights and biases\n",
     "run = wandb.init(project=\"thesis\", job_type=\"dataset-creation\", entity=\"fbv\")\n",
-    "dataset = wandb.Artifact(name=f\"{exchange}_{strategy}_raw\", type=\"preprocessed_data\")\n"
+    "dataset = wandb.Artifact(name=f\"{exchange}_{strategy}_raw\", type=\"preprocessed_data\")"
    ]
   },
   {
@@ -61,7 +60,7 @@
    "source": [
     "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n",
     "\n",
-    "fs = gcsfs.GCSFileSystem(project=\"thesis\")\n"
+    "fs = gcsfs.GCSFileSystem(project=\"thesis\")"
    ]
   },
   {
@@ -93,7 +92,7 @@
    "source": [
     "files = [\n",
     "    f\"gs://thesis-bucket-option-trade-classification/data/preprocessed/{'matched' if strategy == 'supervised' else 'unmatched'}_{exchange}_quotes_min_mem_usage_extended_part_{i:04d}.parquet\"\n",
-    "    for i in range(0, max_i)\n",
+    "    for i in range(max_i)\n",
     "]\n",
     "\n",
     "columns = [\n",
@@ -122,7 +121,7 @@
     "dfs = [pd.read_parquet(gc_file, columns=columns) for gc_file in tqdm(files)]\n",
     "df = pd.concat(dfs)\n",
     "\n",
-    "del dfs\n"
+    "del dfs"
    ]
   },
   {
@@ -138,7 +137,7 @@
    },
    "outputs": [],
    "source": [
-    "df.memory_usage(deep=True).sum()\n"
+    "df.memory_usage(deep=True).sum()"
    ]
   },
   {
@@ -149,7 +148,7 @@
    },
    "outputs": [],
    "source": [
-    "df.sort_values(by=\"QUOTE_DATETIME\", inplace=True)\n"
+    "df.sort_values(by=\"QUOTE_DATETIME\", inplace=True)"
    ]
   },
   {
@@ -160,7 +159,7 @@
    },
    "outputs": [],
    "source": [
-    "df.head()\n"
+    "df.head()"
    ]
   },
   {
@@ -175,7 +174,7 @@
     "labelled_df = pd.read_parquet(\n",
     "    f\"gs://thesis-bucket-option-trade-classification/data/preprocessed/{exchange}_supervised_train.parquet\",\n",
     "    columns=columns,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -186,7 +185,7 @@
    },
    "outputs": [],
    "source": [
-    "labelled_df.head()\n"
+    "labelled_df.head()"
    ]
   },
   {
@@ -201,7 +200,7 @@
     "date_range = labelled_df.QUOTE_DATETIME.between(\n",
     "    df.QUOTE_DATETIME.min(), df.QUOTE_DATETIME.max()\n",
     ")\n",
-    "labelled_df = labelled_df[date_range]\n"
+    "labelled_df = labelled_df[date_range]"
    ]
   },
   {
@@ -232,7 +231,7 @@
     "    # 'price_ex_lead', 'price_ex_lag',\n",
     "]\n",
     "\n",
-    "labelled_df[\"duplicated\"] = labelled_df.duplicated(subset=subset)\n"
+    "labelled_df[\"duplicated\"] = labelled_df.duplicated(subset=subset)"
    ]
   },
   {
@@ -243,7 +242,7 @@
    },
    "outputs": [],
    "source": [
-    "labelled_df[\"duplicated\"].value_counts()\n"
+    "labelled_df[\"duplicated\"].value_counts()"
    ]
   },
   {
@@ -254,7 +253,7 @@
    },
    "outputs": [],
    "source": [
-    "labelled_df[labelled_df[\"optionid\"] == 83414152.0].head(20).T\n"
+    "labelled_df[labelled_df[\"optionid\"] == 83414152.0].head(20).T"
    ]
   },
   {
@@ -265,7 +264,7 @@
    },
    "outputs": [],
    "source": [
-    "labelled_df[\"index_labelled\"] = labelled_df.index\n"
+    "labelled_df[\"index_labelled\"] = labelled_df.index"
    ]
   },
   {
@@ -277,7 +276,7 @@
    "outputs": [],
    "source": [
     "len_labelled_df = len(labelled_df)\n",
-    "len_df = len(df)\n"
+    "len_df = len(df)"
    ]
   },
   {
@@ -314,7 +313,7 @@
     "    how=\"left\",\n",
     "    indicator=\"exists\",\n",
     "    suffixes=(\"_unlabelled\", \"_labelled\"),\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -325,7 +324,7 @@
    },
    "outputs": [],
    "source": [
-    "df_w_indicator.head(50)\n"
+    "df_w_indicator.head(50)"
    ]
   },
   {
@@ -338,7 +337,7 @@
    "source": [
     "# interpolate missing indices. index increases 1 -> 2. So filling with float seems ok. will be inserted between int of labelled df.\n",
     "df_w_indicator[\"index_labelled\"].interpolate(\"linear\", inplace=True)\n",
-    "df_w_indicator.set_index(keys=\"index_labelled\", drop=True, inplace=True)\n"
+    "df_w_indicator.set_index(keys=\"index_labelled\", drop=True, inplace=True)"
    ]
   },
   {
@@ -349,7 +348,7 @@
    },
    "outputs": [],
    "source": [
-    "df_w_indicator.head()\n"
+    "df_w_indicator.head()"
    ]
   },
   {
@@ -360,7 +359,7 @@
    },
    "outputs": [],
    "source": [
-    "len(df_w_indicator)\n"
+    "len(df_w_indicator)"
    ]
   },
   {
@@ -371,7 +370,7 @@
    },
    "outputs": [],
    "source": [
-    "len(df)\n"
+    "len(df)"
    ]
   },
   {
@@ -382,7 +381,7 @@
    },
    "outputs": [],
    "source": [
-    "len(labelled_df)\n"
+    "len(labelled_df)"
    ]
   },
   {
@@ -394,7 +393,7 @@
    "outputs": [],
    "source": [
     "# sort columns lexigraphically\n",
-    "df_w_indicator.sort_index(axis=1, inplace=True)\n"
+    "df_w_indicator.sort_index(axis=1, inplace=True)"
    ]
   },
   {
@@ -405,7 +404,7 @@
    },
    "outputs": [],
    "source": [
-    "df_w_indicator[df_w_indicator[\"exists\"] == \"both\"].head(20).T\n"
+    "df_w_indicator[df_w_indicator[\"exists\"] == \"both\"].head(20).T"
    ]
   },
   {
@@ -426,7 +425,7 @@
     "    \"2013-04-24 00:00:00\", \"2013-10-24 16:14:48\"\n",
     ")\n",
     "\n",
-    "df_w_indicator = df_w_indicator[date_range]\n"
+    "df_w_indicator = df_w_indicator[date_range]"
    ]
   },
   {
@@ -437,7 +436,7 @@
    },
    "outputs": [],
    "source": [
-    "df_w_indicator.head(5).T\n"
+    "df_w_indicator.head(5).T"
    ]
   },
   {
@@ -449,7 +448,7 @@
    "outputs": [],
    "source": [
     "# add fields\n",
-    "df_w_indicator[\"buy_sell\"] = 0\n"
+    "df_w_indicator[\"buy_sell\"] = 0"
    ]
   },
   {
@@ -469,7 +468,7 @@
     "        df_w_indicator[\"STRK_PRC\"],\n",
     "        df_w_indicator[\"EXPIRATION\"],\n",
     "    ]\n",
-    ")[\"TRADE_SIZE\"].transform(\"sum\")\n"
+    ")[\"TRADE_SIZE\"].transform(\"sum\")"
    ]
   },
   {
@@ -480,7 +479,7 @@
    },
    "outputs": [],
    "source": [
-    "df_w_indicator\n"
+    "df_w_indicator"
    ]
   },
   {
@@ -504,7 +503,7 @@
     "        \"duplicated\",\n",
     "    ]\n",
     ")\n",
-    "train.columns = train.columns.str.replace(r\"_unlabelled$\", \"\", regex=True)\n"
+    "train.columns = train.columns.str.replace(r\"_unlabelled$\", \"\", regex=True)"
    ]
   },
   {
@@ -515,7 +514,7 @@
    },
    "outputs": [],
    "source": [
-    "train.head().T\n"
+    "train.head().T"
    ]
   },
   {
@@ -526,7 +525,7 @@
    },
    "outputs": [],
    "source": [
-    "train.describe()\n"
+    "train.describe()"
    ]
   },
   {
@@ -544,7 +543,7 @@
    "source": [
     "output_path = f\"gs://thesis-bucket-option-trade-classification/data/preprocessed/{exchange}_{strategy}_train.parquet\"\n",
     "train.to_parquet(output_path)\n",
-    "dataset.add_reference(output_path, name=\"train_set\")\n"
+    "dataset.add_reference(output_path, name=\"train_set\")"
    ]
   },
   {
@@ -573,7 +572,7 @@
    "source": [
     "# Log the artifact to save it as an output of this run\n",
     "run.log_artifact(dataset)\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   }
  ],
diff --git a/notebooks/3.0a-mb-explanatory-data-analysis.ipynb b/notebooks/3.0a-mb-explanatory-data-analysis.ipynb
index a0e2d981..23caadf9 100644
--- a/notebooks/3.0a-mb-explanatory-data-analysis.ipynb
+++ b/notebooks/3.0a-mb-explanatory-data-analysis.ipynb
@@ -46,7 +46,7 @@
     "plt.style.use(\"seaborn-notebook\")\n",
     "\n",
     "# set ratio of figure\n",
-    "ratio = (16, 9)\n"
+    "ratio = (16, 9)"
    ]
   },
   {
@@ -59,9 +59,7 @@
    "source": [
     "# set fixed seed\n",
     "def seed_everything(seed) -> None:\n",
-    "    \"\"\"\n",
-    "    Seeds basic parameters for reproducibility of results.\n",
-    "    \"\"\"\n",
+    "    \"\"\"Seeds basic parameters for reproducibility of results.\"\"\"\n",
     "    os.environ[\"PYTHONHASHSEED\"] = str(seed)\n",
     "    random.seed(seed)\n",
     "    # pandas and numpy as discussed here: https://stackoverflow.com/a/52375474/5755604\n",
@@ -69,7 +67,7 @@
     "\n",
     "\n",
     "seed = 42\n",
-    "seed_everything(seed)\n"
+    "seed_everything(seed)"
    ]
   },
   {
@@ -91,7 +89,7 @@
    "source": [
     "data = pd.read_parquet(\n",
     "    \"gs://thesis-bucket-option-trade-classification/data/preprocessed/train_set_extended_60.parquet\"\n",
-    ").sample(frac=0.1, axis=0, random_state=seed)\n"
+    ").sample(frac=0.1, axis=0, random_state=seed)"
    ]
   },
   {
@@ -207,7 +205,7 @@
    },
    "outputs": [],
    "source": [
-    "data.head()\n"
+    "data.head()"
    ]
   },
   {
@@ -223,7 +221,7 @@
    },
    "outputs": [],
    "source": [
-    "data.describe()\n"
+    "data.describe()"
    ]
   },
   {
@@ -238,7 +236,7 @@
    },
    "outputs": [],
    "source": [
-    "data.info()\n"
+    "data.info()"
    ]
   },
   {
@@ -253,7 +251,7 @@
    },
    "outputs": [],
    "source": [
-    "print(data.shape)\n"
+    "print(data.shape)"
    ]
   },
   {
@@ -271,7 +269,7 @@
     "print(data.shape)\n",
     "# drop identical rows, if present\n",
     "data.drop_duplicates(inplace=True)\n",
-    "print(data.shape)\n"
+    "print(data.shape)"
    ]
   },
   {
@@ -297,7 +295,7 @@
    },
    "outputs": [],
    "source": [
-    "data.nunique()\n"
+    "data.nunique()"
    ]
   },
   {
@@ -313,7 +311,7 @@
    },
    "outputs": [],
    "source": [
-    "data.head().T\n"
+    "data.head().T"
    ]
   },
   {
@@ -357,7 +355,7 @@
    "outputs": [],
    "source": [
     "corr: pd.DataFrame = data.corr()\n",
-    "sns.heatmap(corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values)  \n"
+    "sns.heatmap(corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values)"
    ]
   },
   {
@@ -401,7 +399,7 @@
     "        \"price_all_lead\",\n",
     "        \"day_vol\",\n",
     "    ],\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -428,7 +426,7 @@
    "source": [
     "sort_criteria = corr[\"buy_sell\"].abs().sort_values(ascending=False)\n",
     "corr_target = corr.sort_values(\"buy_sell\", ascending=False)[\"buy_sell\"]\n",
-    "corr_target.loc[sort_criteria.index].to_frame()\n"
+    "corr_target.loc[sort_criteria.index].to_frame()"
    ]
   },
   {
@@ -452,7 +450,7 @@
    "outputs": [],
    "source": [
     "# remove some columns, which will NOT be used in model\n",
-    "data.drop(columns=[\"optionid\"], inplace=True)\n"
+    "data.drop(columns=[\"optionid\"], inplace=True)"
    ]
   },
   {
@@ -484,7 +482,7 @@
     "# Find index of feature columns with correlation greater than 0.975\n",
     "to_drop = [column for column in upper.columns if any(abs(upper[column]) > threshold)]\n",
     "\n",
-    "print(to_drop)\n"
+    "print(to_drop)"
    ]
   },
   {
@@ -514,7 +512,7 @@
     "\n",
     "# For each column, record the variables that are above the threshold\n",
     "for col in corr:\n",
-    "    above_threshold_vars[col] = list(corr.index[corr[col] > threshold])\n"
+    "    above_threshold_vars[col] = list(corr.index[corr[col] > threshold])"
    ]
   },
   {
@@ -529,7 +527,7 @@
    },
    "outputs": [],
    "source": [
-    "pd.Series(above_threshold_vars)\n"
+    "pd.Series(above_threshold_vars)"
    ]
   },
   {
@@ -570,7 +568,7 @@
    },
    "outputs": [],
    "source": [
-    "data.head()\n"
+    "data.head()"
    ]
   },
   {
@@ -581,7 +579,7 @@
    },
    "outputs": [],
    "source": [
-    "sample = data.select_dtypes(include=np.number).fillna(0).drop(columns=[\"buy_sell\"])\n"
+    "sample = data.select_dtypes(include=np.number).fillna(0).drop(columns=[\"buy_sell\"])"
    ]
   },
   {
@@ -600,7 +598,7 @@
     "    learning_rate=\"auto\",\n",
     "    n_iter=300,\n",
     ")\n",
-    "Y = tsne.fit_transform(sample)\n"
+    "Y = tsne.fit_transform(sample)"
    ]
   },
   {
@@ -612,7 +610,7 @@
    "outputs": [],
    "source": [
     "dims = pd.DataFrame(Y, columns=[\"x\", \"y\"], index=data.index)\n",
-    "dims[\"class\"] = data[\"buy_sell\"]\n"
+    "dims[\"class\"] = data[\"buy_sell\"]"
    ]
   },
   {
@@ -623,7 +621,7 @@
    },
    "outputs": [],
    "source": [
-    "dims[\"class\"] = data.buy_sell\n"
+    "dims[\"class\"] = data.buy_sell"
    ]
   },
   {
@@ -643,7 +641,7 @@
     "scatter = plt.scatter(dims[\"x\"], dims[\"y\"], c=dims[\"class\"], cmap=plt.cm.rainbow)\n",
     "plt.setp(ax, xticks=[], yticks=[])\n",
     "plt.title(\"t-SNE of dataset\")\n",
-    "plt.legend(handles=scatter.legend_elements()[0], labels=[\"-1\", \"1\"])\n"
+    "plt.legend(handles=scatter.legend_elements()[0], labels=[\"-1\", \"1\"])"
    ]
   },
   {
@@ -655,7 +653,7 @@
    "outputs": [],
    "source": [
     "del sample\n",
-    "del dims\n"
+    "del dims"
    ]
   },
   {
@@ -728,7 +726,7 @@
     "data[\"day\"] = data[\"QUOTE_DATETIME\"].dt.day\n",
     "data[\"month\"] = data[\"QUOTE_DATETIME\"].dt.month\n",
     "data[\"year\"] = data[\"QUOTE_DATETIME\"].dt.year\n",
-    "data[\"date\"] = data[\"QUOTE_DATETIME\"].dt.date\n"
+    "data[\"date\"] = data[\"QUOTE_DATETIME\"].dt.date"
    ]
   },
   {
@@ -786,7 +784,7 @@
     "    \"ttm (6-12] month\",\n",
     "    \"ttm > 12 month\",\n",
     "]\n",
-    "data[\"ttm_binned\"] = pd.cut(data[\"ttm\"], bins_ttm, labels=ttm_labels)\n"
+    "data[\"ttm_binned\"] = pd.cut(data[\"ttm\"], bins_ttm, labels=ttm_labels)"
    ]
   },
   {
@@ -849,7 +847,7 @@
     "data[\"abs_mid_BEST\"] = data[\"TRADE_PRICE\"] - mid_best\n",
     "\n",
     "data[\"spread_ex\"] = spread_ex\n",
-    "data[\"spread_best\"] = spread_best\n"
+    "data[\"spread_best\"] = spread_best"
    ]
   },
   {
@@ -869,7 +867,7 @@
    },
    "outputs": [],
    "source": [
-    "data[\"symbol_is_index\"] = data[\"ROOT\"].str.startswith(\"^\").astype(int)\n"
+    "data[\"symbol_is_index\"] = data[\"ROOT\"].str.startswith(\"^\").astype(int)"
    ]
   },
   {
@@ -885,7 +883,7 @@
    },
    "outputs": [],
    "source": [
-    "data.head()\n"
+    "data.head()"
    ]
   },
   {
@@ -915,8 +913,7 @@
    "outputs": [],
    "source": [
     "def plot_kde_target(var_name: str, clip: List[float] | None = None):\n",
-    "    \"\"\"\n",
-    "    Plot kde plots for buys (+1) and sells (-1) with regard to the feature 'var_name'.\n",
+    "    \"\"\"Plot kde plots for buys (+1) and sells (-1) with regard to the feature 'var_name'.\n",
     "\n",
     "    Args:\n",
     "        var_name (str): name of feature\n",
@@ -945,7 +942,7 @@
     "        f\"The correlation between '{var_name}' and the 'buy_sell' is {corr_var: 0.4f}\"\n",
     "    )\n",
     "    print(f\"Median value of sells = {median_sell: 0.4f}\")\n",
-    "    print(f\"Median value of buys = {median_buy: 0.4f}\")\n"
+    "    print(f\"Median value of buys = {median_buy: 0.4f}\")"
    ]
   },
   {
@@ -961,8 +958,7 @@
     "    clip: float | None = None,\n",
     "    years: List[int] = [2006, 2010, 2013],\n",
     ") -> None:\n",
-    "    \"\"\"\n",
-    "    Plot several kde plots side by side for the feature.\n",
+    "    \"\"\"Plot several kde plots side by side for the feature.\n",
     "\n",
     "    Args:\n",
     "        var_name (str): name of the feature\n",
@@ -988,7 +984,7 @@
     "            )\n",
     "            ax[y].xaxis.label.set_text(str(year))\n",
     "\n",
-    "    fig.legend()\n"
+    "    fig.legend()"
    ]
   },
   {
@@ -1003,9 +999,7 @@
     "\n",
     "\n",
     "def plot_recessions() -> None:\n",
-    "    \"\"\"\n",
-    "    Add recession indicator to plot and entry to legend.\n",
-    "    \"\"\"\n",
+    "    \"\"\"Add recession indicator to plot and entry to legend.\"\"\"\n",
     "    l = 0\n",
     "    month = relativedelta.relativedelta(months=+1)\n",
     "    for date, val in us_rec[\"USREC\"].items():\n",
@@ -1019,7 +1013,7 @@
     "                alpha=0.25,\n",
     "                label=\"_\" * l + \"recession\",\n",
     "            )\n",
-    "            l += 1\n"
+    "            l += 1"
    ]
   },
   {
@@ -1033,8 +1027,7 @@
     "def plot_time_series(\n",
     "    feature: str | List[str], aggregation: str | List[Any] = \"count\"\n",
     ") -> pd.DataFrame:\n",
-    "    \"\"\"\n",
-    "    Plot feature over time. Aggregate using 'aggregation'.\n",
+    "    \"\"\"Plot feature over time. Aggregate using 'aggregation'.\n",
     "\n",
     "    Args:\n",
     "        feature (str | List[str]): features to plot.\n",
@@ -1058,7 +1051,7 @@
     "    ax.legend()\n",
     "    plt.show()\n",
     "\n",
-    "    return time_series\n"
+    "    return time_series"
    ]
   },
   {
@@ -1081,7 +1074,7 @@
     "cat_columns_bin = [\"bin_\" + x for x in cat_columns]\n",
     "\n",
     "# binarize categorical similar to Borisov et al.\n",
-    "data[cat_columns_bin] = data[cat_columns].apply(lambda x: pd.factorize(x)[0])  \n"
+    "data[cat_columns_bin] = data[cat_columns].apply(lambda x: pd.factorize(x)[0])"
    ]
   },
   {
@@ -1124,7 +1117,7 @@
    },
    "outputs": [],
    "source": [
-    "trades_per_day = plot_time_series(\"TRADE_PRICE\", \"count\")\n"
+    "trades_per_day = plot_time_series(\"TRADE_PRICE\", \"count\")"
    ]
   },
   {
@@ -1139,7 +1132,7 @@
    },
    "outputs": [],
    "source": [
-    "trades_per_day.iloc[:, 0].nlargest(N)\n"
+    "trades_per_day.iloc[:, 0].nlargest(N)"
    ]
   },
   {
@@ -1154,7 +1147,7 @@
    },
    "outputs": [],
    "source": [
-    "trades_per_day.iloc[:, 0].nsmallest(N)\n"
+    "trades_per_day.iloc[:, 0].nsmallest(N)"
    ]
   },
   {
@@ -1190,8 +1183,8 @@
    },
    "outputs": [],
    "source": [
-    "ax = sns.histplot(data, x=\"TRADE_SIZE\", bins=50)  \n",
-    "ax.title.set_text(\"Histogram of trade size\")\n"
+    "ax = sns.histplot(data, x=\"TRADE_SIZE\", bins=50)\n",
+    "ax.title.set_text(\"Histogram of trade size\")"
    ]
   },
   {
@@ -1218,7 +1211,7 @@
    },
    "outputs": [],
    "source": [
-    "trades_over_time = plot_time_series(\"TRADE_SIZE\", [\"mean\", \"median\"])\n"
+    "trades_over_time = plot_time_series(\"TRADE_SIZE\", [\"mean\", \"median\"])"
    ]
   },
   {
@@ -1236,7 +1229,7 @@
    "source": [
     "trade_ask_bid_size = plot_time_series(\n",
     "    [\"TRADE_SIZE\", \"ask_size_ex\", \"bid_size_ex\"], \"mean\"\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -1263,7 +1256,7 @@
    },
    "outputs": [],
    "source": [
-    "data[\"TRADE_SIZE\"].describe()\n"
+    "data[\"TRADE_SIZE\"].describe()"
    ]
   },
   {
@@ -1279,7 +1272,7 @@
    },
    "outputs": [],
    "source": [
-    "data[data[\"TRADE_SIZE\"].max() == data[\"TRADE_SIZE\"]]\n"
+    "data[data[\"TRADE_SIZE\"].max() == data[\"TRADE_SIZE\"]]"
    ]
   },
   {
@@ -1295,7 +1288,7 @@
    },
    "outputs": [],
    "source": [
-    "data.nlargest(N, \"TRADE_SIZE\", keep=\"first\").T\n"
+    "data.nlargest(N, \"TRADE_SIZE\", keep=\"first\").T"
    ]
   },
   {
@@ -1312,8 +1305,8 @@
    "outputs": [],
    "source": [
     "data[\"log_trade_size\"] = np.log1p(data[\"TRADE_SIZE\"])\n",
-    "ax = sns.histplot(data, x=\"log_trade_size\", bins=50)  \n",
-    "ax.title.set_text(f\"Histogram of trade size (log1p)\")\n"
+    "ax = sns.histplot(data, x=\"log_trade_size\", bins=50)\n",
+    "ax.title.set_text(\"Histogram of trade size (log1p)\")"
    ]
   },
   {
@@ -1329,7 +1322,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"log_trade_size\", clip=[0, 6])\n"
+    "plot_kde_target(\"log_trade_size\", clip=[0, 6])"
    ]
   },
   {
@@ -1364,8 +1357,8 @@
    },
    "outputs": [],
    "source": [
-    "ax = sns.histplot(data, x=\"TRADE_PRICE\", bins=50)  \n",
-    "ax.title.set_text(\"Histogram of trade price\")\n"
+    "ax = sns.histplot(data, x=\"TRADE_PRICE\", bins=50)\n",
+    "ax.title.set_text(\"Histogram of trade price\")"
    ]
   },
   {
@@ -1382,7 +1375,7 @@
    "outputs": [],
    "source": [
     "ax = sns.boxplot(data=data, x=\"buy_sell\", y=\"TRADE_PRICE\")\n",
-    "ax.title.set_text(\"Box plot of 'TRADE_PRICE' for buys (1) and sells (-1)\")\n"
+    "ax.title.set_text(\"Box plot of 'TRADE_PRICE' for buys (1) and sells (-1)\")"
    ]
   },
   {
@@ -1405,7 +1398,7 @@
    },
    "outputs": [],
    "source": [
-    "data[\"log_trade_price\"] = np.log1p(data[\"TRADE_PRICE\"])\n"
+    "data[\"log_trade_price\"] = np.log1p(data[\"TRADE_PRICE\"])"
    ]
   },
   {
@@ -1423,7 +1416,7 @@
    "source": [
     "fig, ax = plt.subplots()\n",
     "\n",
-    "sns.histplot(data, x=\"log_trade_price\", bins=50, stat=\"density\", label=\"log price\")  \n",
+    "sns.histplot(data, x=\"log_trade_price\", bins=50, stat=\"density\", label=\"log price\")\n",
     "\n",
     "# extract the limits for the x-axis and fit normal distributon\n",
     "x0, x1 = ax.get_xlim()\n",
@@ -1435,7 +1428,7 @@
     "\n",
     "\n",
     "ax.title.set_text(\"Distribution of log prices\")\n",
-    "ax.legend()\n"
+    "ax.legend()"
    ]
   },
   {
@@ -1452,7 +1445,7 @@
    "outputs": [],
    "source": [
     "ax = sns.boxplot(data=data, x=\"buy_sell\", y=\"log_trade_price\")\n",
-    "ax.title.set_text(\"Box plot of log prices for buys (1) and sells (-1)\")\n"
+    "ax.title.set_text(\"Box plot of log prices for buys (1) and sells (-1)\")"
    ]
   },
   {
@@ -1468,7 +1461,7 @@
    },
    "outputs": [],
    "source": [
-    "data.nlargest(N, \"TRADE_PRICE\", keep=\"first\").T\n"
+    "data.nlargest(N, \"TRADE_PRICE\", keep=\"first\").T"
    ]
   },
   {
@@ -1484,7 +1477,7 @@
    },
    "outputs": [],
    "source": [
-    "trade_price_over_time = plot_time_series(\"TRADE_PRICE\", [\"mean\", \"median\"])\n"
+    "trade_price_over_time = plot_time_series(\"TRADE_PRICE\", [\"mean\", \"median\"])"
    ]
   },
   {
@@ -1502,7 +1495,7 @@
    "source": [
     "trade_price_over_time = plot_time_series(\n",
     "    [\"TRADE_PRICE\", \"price_ex_lead\", \"price_ex_lag\"], \"mean\"\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -1520,7 +1513,7 @@
    "source": [
     "trade_price_over_time = plot_time_series(\n",
     "    [\"TRADE_PRICE\", \"price_ex_lead\", \"price_ex_lag\"], \"median\"\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -1556,7 +1549,7 @@
    },
    "outputs": [],
    "source": [
-    "ttm_over_time = plot_time_series(\"ttm\", \"mean\")\n"
+    "ttm_over_time = plot_time_series(\"ttm\", \"mean\")"
    ]
   },
   {
@@ -1572,8 +1565,8 @@
    },
    "outputs": [],
    "source": [
-    "ax = sns.histplot(data=data[data[\"bid_ex\"] == 0.0], x=\"ttm\", bins=50)  \n",
-    "ax.title.set_text(\"Count of transactions with regard to time to maturity (months)\")\n"
+    "ax = sns.histplot(data=data[data[\"bid_ex\"] == 0.0], x=\"ttm\", bins=50)\n",
+    "ax.title.set_text(\"Count of transactions with regard to time to maturity (months)\")"
    ]
   },
   {
@@ -1601,7 +1594,7 @@
    "outputs": [],
    "source": [
     "# TODO: ask of zero plausible?\n",
-    "sns.histplot(data=data[data[\"ask_ex\"] == 0.0], x=\"ttm\", bins=50)  \n"
+    "sns.histplot(data=data[data[\"ask_ex\"] == 0.0], x=\"ttm\", bins=50)"
    ]
   },
   {
@@ -1626,8 +1619,8 @@
    },
    "outputs": [],
    "source": [
-    "ax = sns.histplot(data, x=\"STRK_PRC\", bins=50)  \n",
-    "ax.title.set_text(\"Histogram of strike price\")\n"
+    "ax = sns.histplot(data, x=\"STRK_PRC\", bins=50)\n",
+    "ax.title.set_text(\"Histogram of strike price\")"
    ]
   },
   {
@@ -1644,7 +1637,7 @@
    "outputs": [],
    "source": [
     "ax = sns.boxplot(data=data, x=\"buy_sell\", y=\"STRK_PRC\")\n",
-    "ax.title.set_text(\"Box plot of strike prices for buys (1) and sells (-1)\")\n"
+    "ax.title.set_text(\"Box plot of strike prices for buys (1) and sells (-1)\")"
    ]
   },
   {
@@ -1660,7 +1653,7 @@
    },
    "outputs": [],
    "source": [
-    "strike_over_time = plot_time_series(\"STRK_PRC\", \"mean\")\n"
+    "strike_over_time = plot_time_series(\"STRK_PRC\", \"mean\")"
    ]
   },
   {
@@ -1683,7 +1676,7 @@
    },
    "outputs": [],
    "source": [
-    "data[\"log_strk_prc\"] = np.log1p(data[\"STRK_PRC\"])\n"
+    "data[\"log_strk_prc\"] = np.log1p(data[\"STRK_PRC\"])"
    ]
   },
   {
@@ -1699,8 +1692,8 @@
    },
    "outputs": [],
    "source": [
-    "ax = sns.histplot(data, x=\"log_strk_prc\", bins=50)  \n",
-    "ax.title.set_text(\"Histogram of strike price (log1p)\")\n"
+    "ax = sns.histplot(data, x=\"log_strk_prc\", bins=50)\n",
+    "ax.title.set_text(\"Histogram of strike price (log1p)\")"
    ]
   },
   {
@@ -1717,7 +1710,7 @@
    "outputs": [],
    "source": [
     "ax = sns.boxplot(data=data, x=\"buy_sell\", y=\"log_strk_prc\")\n",
-    "ax.title.set_text(\"Box plot of strike prices for buys (1) and sells (-1)\")\n"
+    "ax.title.set_text(\"Box plot of strike prices for buys (1) and sells (-1)\")"
    ]
   },
   {
@@ -1742,7 +1735,7 @@
    "outputs": [],
    "source": [
     "ratio_buy_sell = data[\"buy_sell\"].value_counts() / data[\"buy_sell\"].count()\n",
-    "ratio_buy_sell.head()\n"
+    "ratio_buy_sell.head()"
    ]
   },
   {
@@ -1782,7 +1775,7 @@
    "source": [
     "ax = sns.countplot(data=data, x=\"OPTION_TYPE\", hue=\"buy_sell\")\n",
     "ax.title.set_text(\"Distribution of Buy / Sell indicator with regard to option type\")\n",
-    "sns.move_legend(ax, \"lower center\", bbox_to_anchor=(0.5, -0.3))\n"
+    "sns.move_legend(ax, \"lower center\", bbox_to_anchor=(0.5, -0.3))"
    ]
   },
   {
@@ -1811,7 +1804,7 @@
     "ax.title.set_text(\"Distribution of Buy / Sell indicator with regard to year (binned)\")\n",
     "ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha=\"center\")\n",
     "plt.tight_layout()\n",
-    "plt.show()\n"
+    "plt.show()"
    ]
   },
   {
@@ -1842,7 +1835,7 @@
     ")\n",
     "ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha=\"center\")\n",
     "plt.tight_layout()\n",
-    "plt.show()\n"
+    "plt.show()"
    ]
   },
   {
@@ -1859,7 +1852,7 @@
    "outputs": [],
    "source": [
     "ax = sns.scatterplot(data=sample, x=\"ttm\", y=\"bid_ex\", hue=\"OPTION_TYPE\")\n",
-    "ax.title.set_text(\"Scatter plot of time to maturity (months) and bid (ex)\")\n"
+    "ax.title.set_text(\"Scatter plot of time to maturity (months) and bid (ex)\")"
    ]
   },
   {
@@ -1875,8 +1868,8 @@
    },
    "outputs": [],
    "source": [
-    "ax = sns.histplot(data=data[data[\"bid_ex\"] == 0.0], x=\"ttm\", bins=50)  \n",
-    "ax.title.set_text(\"Count of transactions with regard to time to maturity (months)\")\n"
+    "ax = sns.histplot(data=data[data[\"bid_ex\"] == 0.0], x=\"ttm\", bins=50)\n",
+    "ax.title.set_text(\"Count of transactions with regard to time to maturity (months)\")"
    ]
   },
   {
@@ -1893,7 +1886,7 @@
    "outputs": [],
    "source": [
     "# TODO: ask of zero plausible?\n",
-    "sns.histplot(data=data[data[\"ask_ex\"] == 0.0], x=\"ttm\", bins=50)  \n"
+    "sns.histplot(data=data[data[\"ask_ex\"] == 0.0], x=\"ttm\", bins=50)"
    ]
   },
   {
@@ -1930,7 +1923,7 @@
     ")\n",
     "plot_recessions()\n",
     "ax.legend()\n",
-    "plt.show()\n"
+    "plt.show()"
    ]
   },
   {
@@ -1967,7 +1960,7 @@
     "frequency_symbols = data[\"ROOT\"].value_counts().reset_index(name=\"Count\")\n",
     "frequency_symbols.rename(columns={\"index\": \"Symbol\"}, inplace=True)\n",
     "frequency_symbols.sort_values(\"Count\", ascending=True)\n",
-    "sns.histplot(data=frequency_symbols, x=\"Count\", bins=200)\n"
+    "sns.histplot(data=frequency_symbols, x=\"Count\", bins=200)"
    ]
   },
   {
@@ -1982,7 +1975,7 @@
    },
    "outputs": [],
    "source": [
-    "frequency_symbols[frequency_symbols[\"Count\"] <= 5].count()\n"
+    "frequency_symbols[frequency_symbols[\"Count\"] <= 5].count()"
    ]
   },
   {
@@ -1998,7 +1991,7 @@
    },
    "outputs": [],
    "source": [
-    "frequency_symbols\n"
+    "frequency_symbols"
    ]
   },
   {
@@ -2020,7 +2013,7 @@
     "ax = sns.barplot(data=most_frequent_symbols, x=\"Symbol\", y=\"Count\")\n",
     "ax.title.set_text(f\"{N} most frequently traded symbols\")\n",
     "\n",
-    "most_frequent_symbols.head(N)\n"
+    "most_frequent_symbols.head(N)"
    ]
   },
   {
@@ -2031,7 +2024,7 @@
    },
    "outputs": [],
    "source": [
-    "list_freq_symbols = most_frequent_symbols.Symbol.tolist()\n"
+    "list_freq_symbols = most_frequent_symbols.Symbol.tolist()"
    ]
   },
   {
@@ -2042,7 +2035,7 @@
    },
    "outputs": [],
    "source": [
-    "frequent_symbols_over_time = data[data[\"ROOT\"].isin(list_freq_symbols)]\n"
+    "frequent_symbols_over_time = data[data[\"ROOT\"].isin(list_freq_symbols)]"
    ]
   },
   {
@@ -2060,7 +2053,7 @@
     "    .count()\n",
     "    .reset_index()\n",
     "    .rename(columns={\"TRADE_SIZE\": \"count\", \"QUOTE_DATETIME\": \"date\", \"ROOT\": \"Symbol\"})\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -2075,7 +2068,7 @@
     "    frequent_symbols_trades_per_day.groupby([\"date\", \"Symbol\"])[\"count\"]\n",
     "    .first()\n",
     "    .unstack()\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -2093,7 +2086,7 @@
    "source": [
     "frequent_symbols_over_time.plot(\n",
     "    kind=\"line\", title=f\"{N} most frequently traded underlyings over time\"\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -2117,7 +2110,7 @@
     "    columns=[\"QUOTE_DATETIME\", \"ROOT\"],\n",
     ")\n",
     "\n",
-    "roots_over_time = pd.concat([root_time_train, root_time_val, root_time_test])\n"
+    "roots_over_time = pd.concat([root_time_train, root_time_val, root_time_test])"
    ]
   },
   {
@@ -2135,7 +2128,7 @@
     "    .reset_index()\n",
     "    .sample(N)\n",
     "    .T\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -2158,7 +2151,7 @@
     "        ),\n",
     "        i / N,\n",
     "        np.nan,\n",
-    "    )\n"
+    "    )"
    ]
   },
   {
@@ -2180,7 +2173,7 @@
     "# beginning of validation and test set\n",
     "ax.axvline(\"2013-10-25\", color=\"gray\")\n",
     "ax.axvline(\"2015-11-06\", color=\"gray\")\n",
-    "ax.set_title(\"roots over time (min / max appearance)\")\n"
+    "ax.set_title(\"roots over time (min / max appearance)\")"
    ]
   },
   {
@@ -2201,7 +2194,7 @@
     "ax.title.set_text(\n",
     "    \"Distribution of Buy / Sell indicator with regard to whether underlying is an index\"\n",
     ")\n",
-    "sns.move_legend(ax, \"lower center\", bbox_to_anchor=(0.5, -0.3))\n"
+    "sns.move_legend(ax, \"lower center\", bbox_to_anchor=(0.5, -0.3))"
    ]
   },
   {
@@ -2220,7 +2213,7 @@
     "    data.groupby([\"symbol_is_index\", \"buy_sell\"])[\"buy_sell\"].count()\n",
     "    / data.groupby([\"symbol_is_index\"])[\"buy_sell\"].count()\n",
     ")\n",
-    "ratios_is_index.head()\n"
+    "ratios_is_index.head()"
    ]
   },
   {
@@ -2246,7 +2239,7 @@
    },
    "outputs": [],
    "source": [
-    "data[\"issue_type\"].value_counts(dropna=False)\n"
+    "data[\"issue_type\"].value_counts(dropna=False)"
    ]
   },
   {
@@ -2264,7 +2257,7 @@
    "source": [
     "ax = sns.countplot(data=data, x=\"issue_type\")\n",
     "ax.title.set_text(\"No. of transactions by issue type\")\n",
-    "ax.xaxis.label.set_text(\"issue type\")\n"
+    "ax.xaxis.label.set_text(\"issue type\")"
    ]
   },
   {
@@ -2291,7 +2284,7 @@
    "source": [
     "bid_ask_over_time = plot_time_series(\n",
     "    [\"bid_ex\", \"ask_ex\", \"BEST_ASK\", \"BEST_BID\"], \"mean\"\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -2316,8 +2309,8 @@
    },
    "outputs": [],
    "source": [
-    "ax = sns.histplot(data, x=\"ask_ex\", bins=50)  \n",
-    "ax.title.set_text(\"Histogram of ask (exchange)\")\n"
+    "ax = sns.histplot(data, x=\"ask_ex\", bins=50)\n",
+    "ax.title.set_text(\"Histogram of ask (exchange)\")"
    ]
   },
   {
@@ -2344,8 +2337,8 @@
    "outputs": [],
    "source": [
     "data[\"log_ask_ex\"] = np.log1p(data[\"ask_ex\"])\n",
-    "ax = sns.histplot(data, x=\"log_ask_ex\", bins=50)  \n",
-    "ax.title.set_text(f\"Histogram of ask exchange (log1p)\")\n"
+    "ax = sns.histplot(data, x=\"log_ask_ex\", bins=50)\n",
+    "ax.title.set_text(\"Histogram of ask exchange (log1p)\")"
    ]
   },
   {
@@ -2361,7 +2354,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"log_ask_ex\", clip=[0, 5])\n"
+    "plot_kde_target(\"log_ask_ex\", clip=[0, 5])"
    ]
   },
   {
@@ -2396,8 +2389,8 @@
    },
    "outputs": [],
    "source": [
-    "ax = sns.histplot(data, x=\"bid_ex\", bins=50)  \n",
-    "ax.title.set_text(\"Histogram of bid (exchange)\")\n"
+    "ax = sns.histplot(data, x=\"bid_ex\", bins=50)\n",
+    "ax.title.set_text(\"Histogram of bid (exchange)\")"
    ]
   },
   {
@@ -2414,8 +2407,8 @@
    "outputs": [],
    "source": [
     "data[\"log_bid_ex\"] = np.log1p(data[\"bid_ex\"])\n",
-    "ax = sns.histplot(data, x=\"log_bid_ex\", bins=50)  \n",
-    "ax.title.set_text(f\"Histogram of bid exchange (log1p)\")\n"
+    "ax = sns.histplot(data, x=\"log_bid_ex\", bins=50)\n",
+    "ax.title.set_text(\"Histogram of bid exchange (log1p)\")"
    ]
   },
   {
@@ -2431,7 +2424,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"log_bid_ex\", clip=[0, 5])\n"
+    "plot_kde_target(\"log_bid_ex\", clip=[0, 5])"
    ]
   },
   {
@@ -2448,8 +2441,8 @@
    "outputs": [],
    "source": [
     "data[\"log_bid_ex\"] = np.log1p(data[\"bid_ex\"])\n",
-    "ax = sns.histplot(data, x=\"log_bid_ex\", bins=50)  \n",
-    "ax.title.set_text(\"Histogram of bid exchange (log1p)\")\n"
+    "ax = sns.histplot(data, x=\"log_bid_ex\", bins=50)\n",
+    "ax.title.set_text(\"Histogram of bid exchange (log1p)\")"
    ]
   },
   {
@@ -2465,7 +2458,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"log_bid_ex\", clip=[-5, 6])\n"
+    "plot_kde_target(\"log_bid_ex\", clip=[-5, 6])"
    ]
   },
   {
@@ -2507,15 +2500,13 @@
    "outputs": [],
    "source": [
     "def visualize_nan():\n",
-    "    \"\"\"\n",
-    "    Visualize NaN values in a heatmap to learn about patterns.\n",
-    "    \"\"\"\n",
+    "    \"\"\"Visualize NaN values in a heatmap to learn about patterns.\"\"\"\n",
     "    plt.subplots()\n",
     "    sns.heatmap(data.head(50).isnull(), cbar=False)\n",
     "    plt.xlabel(\"feature\")\n",
     "    plt.ylabel(\"row\")\n",
     "    plt.title(\"Missing values (colored in bright beige)\")\n",
-    "    plt.show()\n"
+    "    plt.show()"
    ]
   },
   {
@@ -2531,7 +2522,7 @@
    },
    "outputs": [],
    "source": [
-    "visualize_nan()\n"
+    "visualize_nan()"
    ]
   },
   {
@@ -2557,7 +2548,7 @@
     "    xlabel=\"No. of missing values\",\n",
     "    ylabel=\"feature\",\n",
     "    title=\"Missing values\",\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -2584,7 +2575,7 @@
     "    title=\"Missing values over time\",\n",
     "    xlabel=\"Timestamp\",\n",
     "    ylabel=\"No. of missing values\",\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -2612,7 +2603,7 @@
     "\n",
     "fig, ax = plt.subplots(figsize=(9, 9))\n",
     "ax = sns.heatmap(corr_mat, mask=mask, annot=False, annot_kws={\"size\": 10}, ax=ax)\n",
-    "ax.title.set_text(\"Correlation between missing features\")\n"
+    "ax.title.set_text(\"Correlation between missing features\")"
    ]
   },
   {
@@ -2652,7 +2643,7 @@
     "ax0.set_aspect(\"auto\")\n",
     "ax0.set_title(\"Dendrogram of missing values\")\n",
     "\n",
-    "ax0\n"
+    "ax0"
    ]
   },
   {
@@ -2699,7 +2690,7 @@
    "source": [
     "corr = data.corr()\n",
     "\n",
-    "sns.heatmap(corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values)  \n"
+    "sns.heatmap(corr, xticklabels=corr.columns.values, yticklabels=corr.columns.values)"
    ]
   },
   {
@@ -2726,7 +2717,7 @@
    "source": [
     "sort_criteria = corr[\"buy_sell\"].abs().sort_values(ascending=False)\n",
     "corr_target = corr.sort_values(\"buy_sell\", ascending=False)[\"buy_sell\"]\n",
-    "corr_target.loc[sort_criteria.index].to_frame()\n"
+    "corr_target.loc[sort_criteria.index].to_frame()"
    ]
   },
   {
@@ -2749,7 +2740,7 @@
     "# Find index of feature columns with correlation greater than 0.95\n",
     "to_drop = [column for column in upper.columns if any(abs(upper[column]) > 0.975)]\n",
     "\n",
-    "print(to_drop)\n"
+    "print(to_drop)"
    ]
   },
   {
@@ -2794,7 +2785,7 @@
     "for col in corr:\n",
     "    above_threshold_vars[col] = list(corr.index[corr[col] > threshold])\n",
     "\n",
-    "pd.Series(above_threshold_vars)\n"
+    "pd.Series(above_threshold_vars)"
    ]
   },
   {
@@ -2819,7 +2810,7 @@
    },
    "outputs": [],
    "source": [
-    "corr_target.loc[sort_criteria.index].to_frame().T\n"
+    "corr_target.loc[sort_criteria.index].to_frame().T"
    ]
   },
   {
@@ -2855,7 +2846,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"prox_ex\", clip=[-2, 2])\n"
+    "plot_kde_target(\"prox_ex\", clip=[-2, 2])"
    ]
   },
   {
@@ -2871,7 +2862,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target_comparsion(\"prox_ex\", years=[2006, 2010, 2013], clip=[-2, 2])\n"
+    "plot_kde_target_comparsion(\"prox_ex\", years=[2006, 2010, 2013], clip=[-2, 2])"
    ]
   },
   {
@@ -2898,7 +2889,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"bid_ask_size_ratio_ex\", clip=[0, 100])\n"
+    "plot_kde_target(\"bid_ask_size_ratio_ex\", clip=[0, 100])"
    ]
   },
   {
@@ -2916,7 +2907,7 @@
    "source": [
     "plot_kde_target_comparsion(\n",
     "    \"bid_ask_size_ratio_ex\", years=[2006, 2010, 2013], clip=[0, 100]\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -2941,7 +2932,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"log_bid_ex\")\n"
+    "plot_kde_target(\"log_bid_ex\")"
    ]
   },
   {
@@ -2957,7 +2948,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target_comparsion(\"log_bid_ex\", years=[2006, 2010, 2013])\n"
+    "plot_kde_target_comparsion(\"log_bid_ex\", years=[2006, 2010, 2013])"
    ]
   },
   {
@@ -2992,7 +2983,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"log_trade_price\")\n"
+    "plot_kde_target(\"log_trade_price\")"
    ]
   },
   {
@@ -3008,7 +2999,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target_comparsion(\"log_trade_price\", years=[2006, 2010, 2013])\n"
+    "plot_kde_target_comparsion(\"log_trade_price\", years=[2006, 2010, 2013])"
    ]
   },
   {
@@ -3043,7 +3034,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"ask_size_ex\", clip=[0, 2000])\n"
+    "plot_kde_target(\"ask_size_ex\", clip=[0, 2000])"
    ]
   },
   {
@@ -3059,7 +3050,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target_comparsion(\"ask_size_ex\", years=[2006, 2010, 2013], clip=[0, 2000])\n"
+    "plot_kde_target_comparsion(\"ask_size_ex\", years=[2006, 2010, 2013], clip=[0, 2000])"
    ]
   },
   {
@@ -3084,7 +3075,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"bid_size_ex\", clip=[0, 1000])\n"
+    "plot_kde_target(\"bid_size_ex\", clip=[0, 1000])"
    ]
   },
   {
@@ -3100,7 +3091,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target_comparsion(\"bid_size_ex\", years=[2006, 2010, 2013], clip=[0, 1000])\n"
+    "plot_kde_target_comparsion(\"bid_size_ex\", years=[2006, 2010, 2013], clip=[0, 1000])"
    ]
   },
   {
@@ -3125,7 +3116,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"abs_mid_ex\", clip=[-0.5, 0.5])\n"
+    "plot_kde_target(\"abs_mid_ex\", clip=[-0.5, 0.5])"
    ]
   },
   {
@@ -3141,7 +3132,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target_comparsion(\"abs_mid_ex\", years=[2006, 2010, 2013], clip=[-0.5, 0.5])\n"
+    "plot_kde_target_comparsion(\"abs_mid_ex\", years=[2006, 2010, 2013], clip=[-0.5, 0.5])"
    ]
   },
   {
@@ -3177,7 +3168,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"day\")\n"
+    "plot_kde_target(\"day\")"
    ]
   },
   {
@@ -3193,7 +3184,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"year\")\n"
+    "plot_kde_target(\"year\")"
    ]
   },
   {
@@ -3239,7 +3230,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"chg_ex_lead\", clip=[-5, 5])\n"
+    "plot_kde_target(\"chg_ex_lead\", clip=[-5, 5])"
    ]
   },
   {
@@ -3255,7 +3246,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target_comparsion(\"chg_ex_lead\", years=[2006, 2010, 2013], clip=[-5, 5])\n"
+    "plot_kde_target_comparsion(\"chg_ex_lead\", years=[2006, 2010, 2013], clip=[-5, 5])"
    ]
   },
   {
@@ -3291,7 +3282,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target_comparsion(\"chg_ex_lag\", years=[2006, 2010, 2013], clip=[-5, 5])\n"
+    "plot_kde_target_comparsion(\"chg_ex_lag\", years=[2006, 2010, 2013], clip=[-5, 5])"
    ]
   },
   {
@@ -3307,7 +3298,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target_comparsion(\"chg_ex_lag\", years=[2006, 2010, 2013], clip=[-5, 5])\n"
+    "plot_kde_target_comparsion(\"chg_ex_lag\", years=[2006, 2010, 2013], clip=[-5, 5])"
    ]
   },
   {
@@ -3334,7 +3325,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"log_bid_ex\", clip=[-5, 8])\n"
+    "plot_kde_target(\"log_bid_ex\", clip=[-5, 8])"
    ]
   },
   {
@@ -3345,7 +3336,7 @@
    },
    "outputs": [],
    "source": [
-    "data.replace([np.inf, -np.inf], np.nan, inplace=True)\n"
+    "data.replace([np.inf, -np.inf], np.nan, inplace=True)"
    ]
   },
   {
@@ -3359,7 +3350,7 @@
     "scaler = StandardScaler()\n",
     "data[\"log_bid_ex_scaled\"] = scaler.fit_transform(\n",
     "    X=data[\"log_bid_ex\"].values.reshape(-1, 1)\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -3374,7 +3365,7 @@
    },
    "outputs": [],
    "source": [
-    "data[\"log_bid_ex_scaled\"].describe()\n"
+    "data[\"log_bid_ex_scaled\"].describe()"
    ]
   },
   {
@@ -3390,7 +3381,7 @@
    },
    "outputs": [],
    "source": [
-    "plot_kde_target(\"log_bid_ex_scaled\", clip=[-5, 5])\n"
+    "plot_kde_target(\"log_bid_ex_scaled\", clip=[-5, 5])"
    ]
   },
   {
diff --git a/notebooks/3.0b-mb-explanatory-matched-unmatched.ipynb b/notebooks/3.0b-mb-explanatory-matched-unmatched.ipynb
index a901d8bb..48aba670 100644
--- a/notebooks/3.0b-mb-explanatory-matched-unmatched.ipynb
+++ b/notebooks/3.0b-mb-explanatory-matched-unmatched.ipynb
@@ -9,25 +9,20 @@
    "source": [
     "from __future__ import annotations\n",
     "\n",
-    "import wandb\n",
-    "from pathlib import Path\n",
-    "import pandas as pd\n",
-    "\n",
     "import os\n",
-    "\n",
-    "from otc.features.build_features import (\n",
-    "    features_classical_size,\n",
-    ")\n",
-    "\n",
-    "\n",
+    "from pathlib import Path\n",
     "from typing import List\n",
     "\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import seaborn as sns\n",
+    "import wandb\n",
+    "from tqdm.auto import tqdm\n",
     "\n",
-    "from tqdm.auto import tqdm"
+    "from otc.features.build_features import (\n",
+    "    features_classical_size,\n",
+    ")"
    ]
   },
   {
@@ -52,7 +47,7 @@
     "\n",
     "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n",
     "\n",
-    "run = wandb.init(project=\"thesis\", entity=\"fbv\")\n"
+    "run = wandb.init(project=\"thesis\", entity=\"fbv\")"
    ]
   },
   {
@@ -74,12 +69,14 @@
     "artifact_dir_unlabelled = artifact_unlabelled.download()\n",
     "\n",
     "x_train_unlabelled = pd.read_parquet(\n",
-    "            Path(artifact_dir_unlabelled, \"train_set.parquet\"), columns=[\"buy_sell\", *features_classical_size]\n",
-    "        )\n",
+    "    Path(artifact_dir_unlabelled, \"train_set.parquet\"),\n",
+    "    columns=[\"buy_sell\", *features_classical_size],\n",
+    ")\n",
     "\n",
     "# labelled data\n",
     "x_train_labelled = pd.read_parquet(\n",
-    "            Path(artifact_dir_labelled, \"train_set.parquet\"), columns=[\"buy_sell\", *features_classical_size]\n",
+    "    Path(artifact_dir_labelled, \"train_set.parquet\"),\n",
+    "    columns=[\"buy_sell\", *features_classical_size],\n",
     ")\n",
     "\n",
     "x_train_unlabelled[\"src\"] = \"unlabelled\"\n",
@@ -139,7 +136,7 @@
    "outputs": [],
    "source": [
     "# slice to same time range as unlabelled trades\n",
-    "x_train_labelled = x_train_labelled.iloc[27248577 : 29510319]"
+    "x_train_labelled = x_train_labelled.iloc[27248577:29510319]"
    ]
   },
   {
@@ -176,18 +173,16 @@
    "outputs": [],
    "source": [
     "def plot_kde_src(var_name: str, clip: List[float] | None = None):\n",
-    "    \"\"\"\n",
-    "    Plot kde plots for labelled and unlabelled with regard to the feature 'var_name'.\n",
+    "    \"\"\"Plot kde plots for labelled and unlabelled with regard to the feature 'var_name'.\n",
     "\n",
     "    Args:\n",
     "        var_name (str): name of feature\n",
     "        clip (List[float] | None, optional): clipping range. Defaults to None.\n",
     "    \"\"\"\n",
-    "\n",
-    "    quantiles = np.linspace(.1, 1, 9, 0)\n",
+    "    quantiles = np.linspace(0.1, 1, 9, 0)\n",
     "    stats_unlabelled = data[data[\"src\"] == \"unlabelled\"][var_name].quantile(quantiles)\n",
     "    stats_labelled = data[data[\"src\"] == \"labelled\"][var_name].quantile(quantiles)\n",
-    "    \n",
+    "\n",
     "    _, ax = plt.subplots()\n",
     "    for i in [\"unlabelled\", \"labelled\"]:\n",
     "        sns.kdeplot(\n",
@@ -203,7 +198,9 @@
     "    sns.move_legend(ax, \"lower center\", bbox_to_anchor=(0.5, -0.3))\n",
     "    plt.show()\n",
     "\n",
-    "    stats = pd.concat([stats_unlabelled, stats_labelled], keys=[\"unlabelled\", \"labelled\"], axis=1)\n",
+    "    stats = pd.concat(\n",
+    "        [stats_unlabelled, stats_labelled], keys=[\"unlabelled\", \"labelled\"], axis=1\n",
+    "    )\n",
     "    print(stats)"
    ]
   },
@@ -256,8 +253,10 @@
    "source": [
     "def plot_hist(unlabelled, labelled, title):\n",
     "    fig, ax = plt.subplots()\n",
-    "    ax.hist(unlabelled, bins=50, alpha=0.5, label='unlabelled', density=True, range=[-2,2])\n",
-    "    ax.hist(labelled, bins=50, alpha=0.5, label='labelled', density=True, range=[-2,2])\n",
+    "    ax.hist(\n",
+    "        unlabelled, bins=50, alpha=0.5, label=\"unlabelled\", density=True, range=[-2, 2]\n",
+    "    )\n",
+    "    ax.hist(labelled, bins=50, alpha=0.5, label=\"labelled\", density=True, range=[-2, 2])\n",
     "    plt.title(title)\n",
     "    plt.legend()\n",
     "    plt.show()"
@@ -398,7 +397,7 @@
    "source": [
     "var_name = \"spread\"\n",
     "\n",
-    "data[var_name] = data[\"ask_ex\"] - data[\"bid_ex\"] "
+    "data[var_name] = data[\"ask_ex\"] - data[\"bid_ex\"]"
    ]
   },
   {
@@ -445,7 +444,7 @@
    "outputs": [],
    "source": [
     "var_name = \"prc_delta\"\n",
-    "data[var_name] = (data[\"TRADE_PRICE\"] - data[\"price_ex_lead\"])"
+    "data[var_name] = data[\"TRADE_PRICE\"] - data[\"price_ex_lead\"]"
    ]
   },
   {
diff --git a/notebooks/3.0c-feature-engineering.ipynb b/notebooks/3.0c-feature-engineering.ipynb
index 842e4f5b..88d1e553 100644
--- a/notebooks/3.0c-feature-engineering.ipynb
+++ b/notebooks/3.0c-feature-engineering.ipynb
@@ -18,15 +18,16 @@
     "import gcsfs\n",
     "import google.auth\n",
     "import numpy as np\n",
-    "import numpy.typing as npt\n",
     "import pandas as pd\n",
     "import wandb\n",
-    "from catboost import CatBoostClassifier, Pool\n",
+    "from catboost import CatBoostClassifier\n",
     "from sklearn.exceptions import NotFittedError\n",
     "from sklearn.metrics import matthews_corrcoef\n",
     "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.preprocessing import (OrdinalEncoder, PowerTransformer,\n",
-    "                                   RobustScaler, StandardScaler)\n",
+    "from sklearn.preprocessing import (\n",
+    "    OrdinalEncoder,\n",
+    "    StandardScaler,\n",
+    ")\n",
     "from tqdm.auto import tqdm"
    ]
   },
@@ -39,7 +40,7 @@
    "outputs": [],
    "source": [
     "credentials, _ = google.auth.default()\n",
-    "fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)\n"
+    "fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)"
    ]
   },
   {
@@ -57,7 +58,7 @@
    "outputs": [],
    "source": [
     "# connect to weights and biases\n",
-    "run = wandb.init(project=\"thesis\", job_type=\"dataset-creation\", entity=\"fbv\")\n"
+    "run = wandb.init(project=\"thesis\", job_type=\"dataset-creation\", entity=\"fbv\")"
    ]
   },
   {
@@ -73,7 +74,7 @@
     "\n",
     "exchange = \"ise\"  # \"ise\"  # \"cboe\"\n",
     "strategy = \"supervised\"  # \"supervised\" #\"unsupervised\" # \"supervised\"  # \"transfer\" # \"unsupervised\"\n",
-    "mode = \"none\"  # \"none\" # \"log_standardized\"\n"
+    "mode = \"none\"  # \"none\" # \"log_standardized\""
    ]
   },
   {
@@ -92,7 +93,7 @@
     "\n",
     "# load unscaled data\n",
     "artifact = run.use_artifact(dataset)\n",
-    "data_dir = artifact.download()\n"
+    "data_dir = artifact.download()"
    ]
   },
   {
@@ -127,7 +128,7 @@
     "    \"buy_sell\",\n",
     "    \"day_vol\",\n",
     "    \"myn\",\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -161,7 +162,7 @@
     "    # load test set\n",
     "    test = pd.read_parquet(\n",
     "        Path(data_dir, \"test_set\"), engine=\"fastparquet\", columns=columns\n",
-    "    )\n"
+    "    )"
    ]
   },
   {
@@ -189,7 +190,7 @@
     "    \"price_ex_lag\",\n",
     "    \"day_vol\",\n",
     "    \"myn\",\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -307,15 +308,14 @@
     "    data: pd.DataFrame,\n",
     "    mode: Literal[\"log_standarized\", \"none\"] = \"log_standardized\",\n",
     ") -> pd.DataFrame:\n",
-    "    \"\"\"\n",
-    "    Create features, impute, and scale.\n",
+    "    \"\"\"Create features, impute, and scale.\n",
     "\n",
     "    Args:\n",
     "        data (pd.DataFrame): input data frame.\n",
+    "\n",
     "    Returns:\n",
     "        pd.DataFrame: updated data frame.\n",
     "    \"\"\"\n",
-    "\n",
     "    # set up df, overwrite later\n",
     "    x = pd.DataFrame(data={\"TRADE_PRICE\": data[\"TRADE_PRICE\"]}, index=data.index)\n",
     "\n",
@@ -439,7 +439,7 @@
     "                data[\"ROOT\"].astype(str).values.reshape(-1, 1)\n",
     "            )\n",
     "            print(\"transform (val + test)\")\n",
-    "        except NotFittedError as e:\n",
+    "        except NotFittedError:\n",
     "            x[num_cols] = scaler.fit_transform(x[num_cols])\n",
     "            x[\"option_type\"] = oe_option_type.fit_transform(\n",
     "                data[\"OPTION_TYPE\"].astype(str).values.reshape(-1, 1)\n",
@@ -511,7 +511,7 @@
     "        x[\"root\"] = data[\"ROOT\"]\n",
     "\n",
     "    x[\"buy_sell\"] = data[\"buy_sell\"].astype(\"int8\")\n",
-    "    return x\n"
+    "    return x"
    ]
   },
   {
@@ -566,7 +566,7 @@
     "    gc.collect()\n",
     "    dataset.add_reference(output_path)\n",
     "\n",
-    "run.log_artifact(dataset)\n"
+    "run.log_artifact(dataset)"
    ]
   },
   {
@@ -580,7 +580,6 @@
     "# save scaler to pickle\n",
     "\n",
     "if strategy == \"supervised\":\n",
-    "\n",
     "    scalers = {\n",
     "        \"scaler\": scaler,\n",
     "        \"oe_option_type\": oe_option_type,\n",
@@ -589,12 +588,12 @@
     "    }\n",
     "    uri_scalers = f\"gs://thesis-bucket-option-trade-classification/data/preprocessed/{name}/scalers.sklearn\"\n",
     "    with fs.open(uri_scalers, \"wb\") as f:\n",
-    "        pickle.dump(scalers, f, protocol=4)  \n",
+    "        pickle.dump(scalers, f, protocol=4)\n",
     "\n",
     "    # log scaler to wandb\n",
     "    scaler = wandb.Artifact(name=f\"{name}_scaler\", type=\"scaler\")\n",
     "    scaler.add_reference(uri_scalers)\n",
-    "    run.log_artifact(scaler)\n"
+    "    run.log_artifact(scaler)"
    ]
   },
   {
@@ -605,7 +604,7 @@
    },
    "outputs": [],
    "source": [
-    "run.finish()\n"
+    "run.finish()"
    ]
   },
   {
@@ -656,7 +655,7 @@
     "    *features_classical,\n",
     "    *features_size,\n",
     "    \"buy_sell\",  # add here and remove later\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -674,7 +673,7 @@
     "    \"gs://thesis-bucket-option-trade-classification/data/ise_log_standardized/val_set_20.parquet\",\n",
     "    engine=\"fastparquet\",\n",
     "    columns=features_classical_size,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -689,7 +688,7 @@
     "X = pd.concat([train, val])\n",
     "X.drop(columns=[\"buy_sell\"], inplace=True)\n",
     "# assign zeros to train set and ones to test set\n",
-    "y = [0] * len(train) + [1] * len(val)\n"
+    "y = [0] * len(train) + [1] * len(val)"
    ]
   },
   {
@@ -698,7 +697,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X.columns\n"
+    "X.columns"
    ]
   },
   {
@@ -715,7 +714,7 @@
     "    logging_level=\"Silent\",\n",
     "    random_seed=42,\n",
     "    eval_metric=\"Accuracy\",\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -727,7 +726,7 @@
     "X_train, X_test, y_train, y_test = train_test_split(\n",
     "    X, y, test_size=0.2, random_state=42, shuffle=True\n",
     ")\n",
-    "clf.fit(X_train, y_train, eval_set=(X_test, y_test))\n"
+    "clf.fit(X_train, y_train, eval_set=(X_test, y_test))"
    ]
   },
   {
@@ -736,7 +735,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "y_pred = clf.predict(X_test)\n"
+    "y_pred = clf.predict(X_test)"
    ]
   },
   {
@@ -746,7 +745,7 @@
    "outputs": [],
    "source": [
     "# use mcc as data is imbalanced 3/4 train set, 1/4 val set\n",
-    "print(matthews_corrcoef(y_test, y_pred))\n"
+    "print(matthews_corrcoef(y_test, y_pred))"
    ]
   },
   {
@@ -758,7 +757,7 @@
     "feature_importance = clf.get_feature_importance(\n",
     "    prettified=True, type=\"FeatureImportance\"\n",
     ")\n",
-    "feature_importance\n"
+    "feature_importance"
    ]
   },
   {
@@ -767,7 +766,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "feature_importance.to_csv(\"feature_importance_gbm_classical_size.csv\")\n"
+    "feature_importance.to_csv(\"feature_importance_gbm_classical_size.csv\")"
    ]
   },
   {
@@ -795,7 +794,7 @@
     "    results.append({\"col\": col, \"static\": res.statistic, \"pvalue\": res.pvalue})\n",
     "\n",
     "results = pd.DataFrame(results)\n",
-    "results.to_csv(\"kolmogorov_smirnov.csv\")\n"
+    "results.to_csv(\"kolmogorov_smirnov.csv\")"
    ]
   },
   {
@@ -824,10 +823,11 @@
    },
    "outputs": [],
    "source": [
-    "from otc.features.build_features import features_classical_size\n",
     "import matplotlib as mpl\n",
     "import matplotlib.pyplot as plt\n",
-    "from matplotlib import rc"
+    "from matplotlib import rc\n",
+    "\n",
+    "from otc.features.build_features import features_classical_size"
    ]
   },
   {
@@ -871,15 +871,30 @@
    },
    "outputs": [],
    "source": [
-    "cols_clearname = [\"trade price\", \"bid (ex)\", \"ask (ex)\", \"ask (best)\",\n",
-    "                 \"bid (best)\", \"price lag (ex)\", \"price lead (ex)\",\n",
-    "                  \"price lag (all)\", \"price lead (all)\",\n",
-    "                  \"price chg. lead (ex)\", \"price chg. lag (ex)\",\n",
-    "                  \"price chg. lead (all)\", \"price chg. lag (all)\",\n",
-    "                  \"prox (ex)\", \"prox (best)\", \"bid ask size ratio (ex)\",\n",
-    "                  \"rel. bid size (ex)\", \"rel. ask size (ex)\", \"trade size\",\n",
-    "                  \"bid size (ex)\", \"ask size (ex)\", \"depth (ex)\"\n",
-    "                 ]"
+    "cols_clearname = [\n",
+    "    \"trade price\",\n",
+    "    \"bid (ex)\",\n",
+    "    \"ask (ex)\",\n",
+    "    \"ask (best)\",\n",
+    "    \"bid (best)\",\n",
+    "    \"price lag (ex)\",\n",
+    "    \"price lead (ex)\",\n",
+    "    \"price lag (all)\",\n",
+    "    \"price lead (all)\",\n",
+    "    \"price chg. lead (ex)\",\n",
+    "    \"price chg. lag (ex)\",\n",
+    "    \"price chg. lead (all)\",\n",
+    "    \"price chg. lag (all)\",\n",
+    "    \"prox (ex)\",\n",
+    "    \"prox (best)\",\n",
+    "    \"bid ask size ratio (ex)\",\n",
+    "    \"rel. bid size (ex)\",\n",
+    "    \"rel. ask size (ex)\",\n",
+    "    \"trade size\",\n",
+    "    \"bid size (ex)\",\n",
+    "    \"ask size (ex)\",\n",
+    "    \"depth (ex)\",\n",
+    "]"
    ]
   },
   {
@@ -924,12 +939,11 @@
     "plt.rcParams.update(params)\n",
     "rc(\"text\", usetex=True)\n",
     "\n",
-    "plt.rc('text.latex', preamble=r'\\usepackage{amsmath}\\usepackage[utf8]{inputenc}')\n",
+    "plt.rc(\"text.latex\", preamble=r\"\\usepackage{amsmath}\\usepackage[utf8]{inputenc}\")\n",
     "\n",
     "cmap = mpl.colormaps.get_cmap(\"plasma\")\n",
     "\n",
     "\n",
-    "\n",
     "# https://ranocha.de/blog/colors/\n",
     "# Standard SciencePlots color cycle\n",
     "mpl.rcParams[\"axes.prop_cycle\"] = mpl.cycler(\n",
@@ -938,11 +952,35 @@
     "\n",
     "# line cyclers adapted to colourblind people\n",
     "from cycler import cycler\n",
-    "line_cycler   = (cycler(color=[\"#E69F00\", \"#56B4E9\", \"#009E73\", \"#0072B2\", \"#D55E00\", \"#CC79A7\", \"#F0E442\"]) #  + cycler(linestyle=[\"-\", \"--\", \"-.\", \":\", \"-\", \"--\", \"-.\"])\n",
-    "                )\n",
-    "marker_cycler = (cycler(color=[\"#E69F00\", \"#56B4E9\", \"#009E73\", \"#0072B2\", \"#D55E00\", \"#CC79A7\", \"#F0E442\"]) +\n",
-    "                 cycler(linestyle=[\"none\", \"none\", \"none\", \"none\", \"none\", \"none\", \"none\"]) +\n",
-    "                 cycler(marker=[\"4\", \"2\", \"3\", \"1\", \"+\", \"x\", \".\"]))\n",
+    "\n",
+    "line_cycler = (\n",
+    "    cycler(\n",
+    "        color=[\n",
+    "            \"#E69F00\",\n",
+    "            \"#56B4E9\",\n",
+    "            \"#009E73\",\n",
+    "            \"#0072B2\",\n",
+    "            \"#D55E00\",\n",
+    "            \"#CC79A7\",\n",
+    "            \"#F0E442\",\n",
+    "        ]\n",
+    "    )  #  + cycler(linestyle=[\"-\", \"--\", \"-.\", \":\", \"-\", \"--\", \"-.\"])\n",
+    ")\n",
+    "marker_cycler = (\n",
+    "    cycler(\n",
+    "        color=[\n",
+    "            \"#E69F00\",\n",
+    "            \"#56B4E9\",\n",
+    "            \"#009E73\",\n",
+    "            \"#0072B2\",\n",
+    "            \"#D55E00\",\n",
+    "            \"#CC79A7\",\n",
+    "            \"#F0E442\",\n",
+    "        ]\n",
+    "    )\n",
+    "    + cycler(linestyle=[\"none\", \"none\", \"none\", \"none\", \"none\", \"none\", \"none\"])\n",
+    "    + cycler(marker=[\"4\", \"2\", \"3\", \"1\", \"+\", \"x\", \".\"])\n",
+    ")\n",
     "\n",
     "plt.rc(\"axes\", prop_cycle=line_cycler)"
    ]
@@ -971,25 +1009,22 @@
     "index = 0\n",
     "\n",
     "for i, col in tqdm(enumerate(cols)):\n",
-    "\n",
-    "    \n",
     "    r = i // 4\n",
     "    c = i % 4\n",
     "\n",
-    "    \n",
     "    ax[r][c].acorr(X[col].astype(float), usevlines=True, normed=True, maxlags=20, lw=1)\n",
     "    ax[r][c].set_title(cols_clearname[index])\n",
     "\n",
-    "    index +=1\n",
+    "    index += 1\n",
     "\n",
     "# remove empty plots\n",
     "fig.delaxes(ax[5][2])\n",
     "fig.delaxes(ax[5][3])\n",
     "\n",
     "plt.savefig(\n",
-    "    f\"../reports/Graphs/auto_corr_features.pdf\",\n",
+    "    \"../reports/Graphs/auto_corr_features.pdf\",\n",
     "    bbox_inches=\"tight\",\n",
-    ")\n"
+    ")"
    ]
   }
  ],
diff --git a/notebooks/3.0d-mb-adv_val.ipynb b/notebooks/3.0d-mb-adv_val.ipynb
index d9bdf0c8..cbd3abd0 100644
--- a/notebooks/3.0d-mb-adv_val.ipynb
+++ b/notebooks/3.0d-mb-adv_val.ipynb
@@ -23,10 +23,9 @@
     "import pandas as pd\n",
     "import wandb\n",
     "from catboost import CatBoostClassifier, Pool\n",
-    "from tqdm.auto import tqdm\n",
-    "\n",
     "from sklearn.metrics import matthews_corrcoef\n",
     "from sklearn.model_selection import train_test_split\n",
+    "from tqdm.auto import tqdm\n",
     "\n",
     "sys.path.append(\"..\")\n",
     "from otc.features.build_features import (\n",
@@ -34,7 +33,7 @@
     "    features_classical,\n",
     "    features_classical_size,\n",
     "    features_ml,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -48,7 +47,7 @@
     "STRATEGY = \"supervised\"  # \"supervised\"  # \"transfer\"\n",
     "\n",
     "# ise-trained models, supervised/semisupervised\n",
-    "models = [\"classical\",\"classical-size\",\"ml\"]\n"
+    "models = [\"classical\", \"classical-size\", \"ml\"]"
    ]
   },
   {
@@ -70,7 +69,7 @@
    "outputs": [],
    "source": [
     "# set project name. Required to access files and artefacts\n",
-    "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n"
+    "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\""
    ]
   },
   {
@@ -90,7 +89,7 @@
     "run = wandb.init(project=\"thesis\", entity=\"fbv\")\n",
     "\n",
     "artifact = run.use_artifact(dataset)\n",
-    "data_dir = artifact.download()\n"
+    "data_dir = artifact.download()"
    ]
   },
   {
@@ -120,7 +119,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=len(val), random_state=42)"
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X, y, test_size=len(val), random_state=42\n",
+    ")"
    ]
   },
   {
@@ -149,19 +150,17 @@
     "}\n",
     "\n",
     "for feature_str in tqdm(models):\n",
-    "\n",
     "    fs = FEATURE_MAP.get(feature_str)\n",
     "    # filter categorical features that are in subset and get cardinality\n",
     "    cat_features_sub = [tup[0] for tup in features_categorical if tup[0] in fs]\n",
-    "    \n",
+    "\n",
     "    train_pool = Pool(\n",
     "        data=X_train.loc[:, fs],\n",
     "        label=y_train,\n",
     "        cat_features=cat_features_sub,\n",
     "    )\n",
     "\n",
-    "    model = CatBoostClassifier(task_type = \"GPU\").fit(train_pool)\n",
-    "\n",
+    "    model = CatBoostClassifier(task_type=\"GPU\").fit(train_pool)\n",
     "\n",
     "    test_pool = Pool(\n",
     "        data=X_test.loc[:, fs],\n",
@@ -170,10 +169,9 @@
     "    )\n",
     "\n",
     "    mcc = matthews_corrcoef(y_test, model.predict(test_pool))\n",
-    "    \n",
+    "\n",
     "    print(feature_str)\n",
-    "    print(mcc)\n",
-    " "
+    "    print(mcc)"
    ]
   },
   {
diff --git a/notebooks/4.0a-mb-logistic-regression.ipynb b/notebooks/4.0a-mb-logistic-regression.ipynb
index 105eecfc..789bd1f8 100644
--- a/notebooks/4.0a-mb-logistic-regression.ipynb
+++ b/notebooks/4.0a-mb-logistic-regression.ipynb
@@ -9,23 +9,21 @@
    },
    "outputs": [],
    "source": [
+    "import math\n",
     "import os\n",
     "import sys\n",
-    "\n",
-    "import math\n",
     "from pathlib import Path\n",
     "\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import torch\n",
     "import wandb\n",
-    "from torch import nn\n",
     "from torch import nn, optim\n",
     "from tqdm.auto import tqdm\n",
     "\n",
     "sys.path.append(\"..\")\n",
-    "from otc.data.dataset import TabDataset\n",
     "from otc.data.dataloader import TabDataLoader\n",
+    "from otc.data.dataset import TabDataset\n",
     "from otc.features.build_features import features_classical_size\n",
     "from otc.optim.early_stopping import EarlyStopping"
    ]
@@ -55,7 +53,7 @@
     "\n",
     "dataset = \"fbv/thesis/ise_supervised_log_standardized_clipped:latest\"\n",
     "artifact = run.use_artifact(dataset)\n",
-    "data_dir = artifact.download()\n"
+    "data_dir = artifact.download()"
    ]
   },
   {
@@ -71,11 +69,15 @@
     "frac = 1\n",
     "\n",
     "# sample\n",
-    "X_train = pd.read_parquet(Path(data_dir, \"train_set.parquet\"), engine=\"fastparquet\").sample(frac=frac)\n",
+    "X_train = pd.read_parquet(\n",
+    "    Path(data_dir, \"train_set.parquet\"), engine=\"fastparquet\"\n",
+    ").sample(frac=frac)\n",
     "y_train = X_train[\"buy_sell\"]\n",
     "X_train = X_train[features_classical_size]\n",
     "\n",
-    "X_val = pd.read_parquet(Path(data_dir, \"val_set.parquet\"), engine=\"fastparquet\").sample(frac=frac)\n",
+    "X_val = pd.read_parquet(Path(data_dir, \"val_set.parquet\"), engine=\"fastparquet\").sample(\n",
+    "    frac=frac\n",
+    ")\n",
     "y_val = X_val[\"buy_sell\"]\n",
     "X_val = X_val[features_classical_size]\n",
     "\n",
@@ -118,7 +120,7 @@
     "test_data = TabDataset(X_test, y_test)\n",
     "\n",
     "dl_params = {\n",
-    "    \"batch_size\": 32768, \n",
+    "    \"batch_size\": 32768,\n",
     "    \"device\": \"cuda\",\n",
     "    \"shuffle\": True,\n",
     "}\n",
@@ -129,7 +131,7 @@
     "    training_data.x_cont,\n",
     "    training_data.weight,\n",
     "    training_data.y,\n",
-    "    **dl_params\n",
+    "    **dl_params,\n",
     ")\n",
     "val_loader = TabDataLoader(\n",
     "    val_data.x_cat, val_data.x_cont, val_data.weight, val_data.y, **dl_params\n",
@@ -151,14 +153,15 @@
    "source": [
     "optim_params = {\"lr\": 1e-4, \"weight_decay\": 0.00001}\n",
     "\n",
-    "clf = LogisticRegression(input_size=X_train.shape[1],num_classes=1).to(\"cuda\")\n",
+    "clf = LogisticRegression(input_size=X_train.shape[1], num_classes=1).to(\"cuda\")\n",
     "\n",
     "criterion = nn.BCEWithLogitsLoss()\n",
     "\n",
-    "optimizer = optim.AdamW(clf.parameters(),\n",
+    "optimizer = optim.AdamW(\n",
+    "    clf.parameters(),\n",
     "    lr=optim_params[\"lr\"],\n",
     "    weight_decay=optim_params[\"weight_decay\"],\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -181,69 +184,65 @@
     "\n",
     "\n",
     "for epoch in tqdm(range(epochs)):\n",
-    "\n",
     "    # perform training\n",
     "    loss_in_epoch_train = 0\n",
     "\n",
     "    batch = 0\n",
-    "    \n",
+    "\n",
     "    for x_cat, x_cont, weights, targets in train_loader:\n",
-    "    \n",
     "        clf.train()\n",
     "        optimizer.zero_grad()\n",
     "\n",
-    "        with torch.autocast(device_type='cuda', dtype=torch.float16):\n",
+    "        with torch.autocast(device_type=\"cuda\", dtype=torch.float16):\n",
     "            logits = clf(x_cat, x_cont).flatten()\n",
     "            train_loss = criterion(logits, targets)\n",
     "\n",
     "        scaler.scale(train_loss).backward()\n",
     "        scaler.step(optimizer)\n",
     "        scaler.update()\n",
-    "        \n",
+    "\n",
     "        # add the mini-batch training loss to epoch loss\n",
     "        loss_in_epoch_train += train_loss  # .item()\n",
     "        wandb.log({\"train_loss_step\": train_loss, \"epoch\": epoch, \"batch\": batch})\n",
-    "            \n",
+    "\n",
     "        batch += 1\n",
-    "        step +=1\n",
+    "        step += 1\n",
     "\n",
     "    clf.eval()\n",
     "    loss_in_epoch_val = 0.0\n",
     "    correct = 0\n",
-    "    \n",
+    "\n",
     "    with torch.no_grad():\n",
     "        for x_cat, x_cont, weights, targets in val_loader:\n",
-    "            \n",
     "            # for my implementation\n",
     "            logits = clf(x_cat, x_cont).flatten()\n",
     "            logits = logits.flatten()\n",
     "\n",
     "            val_loss = criterion(logits, targets)\n",
-    "            \n",
+    "\n",
     "            # get probabilities and round to nearest integer\n",
     "            preds = torch.sigmoid(logits).round()\n",
     "            correct += (preds == targets).sum().item()\n",
     "\n",
     "            loss_in_epoch_val += val_loss  # val_loss #.item()\n",
     "            wandb.log({\"val_loss_step\": val_loss, \"epoch\": epoch, \"batch\": batch})\n",
-    "            \n",
-    "            batch +=1      \n",
+    "\n",
+    "            batch += 1\n",
     "\n",
     "    # loss average over all batches\n",
     "    train_loss = loss_in_epoch_train / len(train_loader)\n",
     "    val_loss = loss_in_epoch_val / len(val_loader)\n",
-    "    \n",
+    "\n",
     "    # correct samples / no samples\n",
     "    val_accuracy = correct / len(X_val)\n",
     "    if best_accuracy < val_accuracy:\n",
     "        best_accuracy = val_accuracy\n",
     "        best_step = step\n",
-    "    \n",
-    "    \n",
-    "    wandb.log({\"train_loss\": train_loss, 'epoch': epoch})\n",
-    "    wandb.log({\"val_loss\": val_loss, 'epoch': epoch})\n",
-    "    # wandb.log({\"val_accuracy\": val_accuracy, 'epoch': epoch})    \n",
-    "    \n",
+    "\n",
+    "    wandb.log({\"train_loss\": train_loss, \"epoch\": epoch})\n",
+    "    wandb.log({\"val_loss\": val_loss, \"epoch\": epoch})\n",
+    "    # wandb.log({\"val_accuracy\": val_accuracy, 'epoch': epoch})\n",
+    "\n",
     "    print(f\"train:{train_loss} val:{val_loss}\")\n",
     "    print(f\"val accuracy:{val_accuracy}\")\n",
     "\n",
@@ -251,7 +250,7 @@
     "    early_stopping(-val_accuracy)\n",
     "    if early_stopping.early_stop or math.isnan(train_loss) or math.isnan(val_loss):\n",
     "        print(\"early stopping now.\")\n",
-    "        break\n"
+    "        break"
    ]
   },
   {
@@ -273,7 +272,7 @@
     "    # https://stackoverflow.com/a/66910866/5755604\n",
     "    preds = torch.sigmoid(logits.squeeze())\n",
     "    y_pred.append(preds.detach().cpu().numpy())\n",
-    "    y_true.append(targets.detach().cpu().numpy())  \n",
+    "    y_true.append(targets.detach().cpu().numpy())\n",
     "\n",
     "# round prediction to nearest int\n",
     "y_pred = np.rint(np.concatenate(y_pred))\n",
diff --git a/notebooks/4.0b-mb-fttransformer.ipynb b/notebooks/4.0b-mb-fttransformer.ipynb
index 162d7ecf..473a9f8a 100644
--- a/notebooks/4.0b-mb-fttransformer.ipynb
+++ b/notebooks/4.0b-mb-fttransformer.ipynb
@@ -10,17 +10,17 @@
    "outputs": [],
    "source": [
     "import glob\n",
-    "import os\n",
     "import math\n",
+    "import os\n",
     "import sys\n",
     "from pathlib import Path\n",
     "\n",
     "import numpy as np\n",
     "import pandas as pd\n",
-    "import wandb\n",
     "import torch\n",
-    "from torch import optim, nn\n",
-    "from tqdm.auto import tqdm\n"
+    "import wandb\n",
+    "from torch import nn, optim\n",
+    "from tqdm.auto import tqdm"
    ]
   },
   {
@@ -31,11 +31,11 @@
    "outputs": [],
    "source": [
     "sys.path.append(\"..\")\n",
-    "from otc.models.fttransformer import FeatureTokenizer, FTTransformer, Transformer\n",
-    "from otc.models.activation import ReGLU\n",
-    "from otc.data.dataset import TabDataset\n",
     "from otc.data.dataloader import TabDataLoader\n",
+    "from otc.data.dataset import TabDataset\n",
     "from otc.features.build_features import features_classical_size\n",
+    "from otc.models.activation import ReGLU\n",
+    "from otc.models.fttransformer import FeatureTokenizer, FTTransformer, Transformer\n",
     "from otc.optim.early_stopping import EarlyStopping\n",
     "from otc.optim.scheduler import CosineWarmupScheduler"
    ]
@@ -65,7 +65,7 @@
     "\n",
     "dataset = \"fbv/thesis/ise_supervised_log_standardized_clipped:latest\"\n",
     "artifact = run.use_artifact(dataset)\n",
-    "data_dir = artifact.download()\n"
+    "data_dir = artifact.download()"
    ]
   },
   {
@@ -98,7 +98,7 @@
     "\n",
     "# eps = 0.1\n",
     "# y_train[np.where(y_train == 0)] = eps\n",
-    "# y_train[np.where(y_train == 1)] = 1.0 - eps\n"
+    "# y_train[np.where(y_train == 1)] = 1.0 - eps"
    ]
   },
   {
@@ -177,8 +177,8 @@
     "optim_params = {\"lr\": 1e-4, \"weight_decay\": 0.00001}\n",
     "\n",
     "module_params = {\n",
-    "    \"transformer\": Transformer(**transformer_kwargs),  \n",
-    "    \"feature_tokenizer\": FeatureTokenizer(**feature_tokenizer_kwargs),   # noqa: E501\n",
+    "    \"transformer\": Transformer(**transformer_kwargs),\n",
+    "    \"feature_tokenizer\": FeatureTokenizer(**feature_tokenizer_kwargs),\n",
     "    \"cat_features\": None,\n",
     "    \"cat_cardinalities\": [],\n",
     "}\n",
@@ -193,7 +193,7 @@
     "# wandb.log(transformer_kwargs)\n",
     "# wandb.log(optim_params)\n",
     "# wandb.log(feature_tokenizer_kwargs)\n",
-    "# wandb.log(dl_params)\n"
+    "# wandb.log(dl_params)"
    ]
   },
   {
@@ -214,7 +214,7 @@
     "    training_data.x_cont,\n",
     "    training_data.weight,\n",
     "    training_data.y,\n",
-    "    **dl_params\n",
+    "    **dl_params,\n",
     ")\n",
     "val_loader = TabDataLoader(\n",
     "    val_data.x_cat, val_data.x_cont, val_data.weight, val_data.y, **dl_params\n",
@@ -222,7 +222,7 @@
     "\n",
     "test_loader = TabDataLoader(\n",
     "    test_data.x_cat, test_data.x_cont, test_data.weight, test_data.y, **dl_params\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -249,7 +249,7 @@
     "\n",
     "scheduler = CosineWarmupScheduler(\n",
     "    optimizer=optimizer, warmup=warmup, max_iters=max_iters\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -262,7 +262,6 @@
    "outputs": [],
    "source": [
     "def checkpoint(model, filename):\n",
-    "\n",
     "    # remove old files\n",
     "    for filename in glob.glob(f\"checkpoints/{run.id}*\"):\n",
     "        os.remove(filename)\n",
@@ -273,7 +272,7 @@
     "\n",
     "    # save new file\n",
     "    print(\"saving new checkpoints.\")\n",
-    "    torch.save(model.state_dict(), os.path.join(dir_checkpoints, f\"{run.id}*\"))\n"
+    "    torch.save(model.state_dict(), os.path.join(dir_checkpoints, f\"{run.id}*\"))"
    ]
   },
   {
@@ -300,14 +299,12 @@
     "best_step = -1\n",
     "\n",
     "for epoch in tqdm(range(epochs)):\n",
-    "\n",
     "    # perform training\n",
     "    loss_in_epoch_train = 0\n",
     "\n",
     "    batch = 0\n",
     "\n",
     "    for x_cat, x_cont, _, targets in train_loader:\n",
-    "\n",
     "        clf.train()\n",
     "        optimizer.zero_grad()\n",
     "\n",
@@ -324,7 +321,9 @@
     "\n",
     "        # add the mini-batch training loss to epoch loss\n",
     "        loss_in_epoch_train += train_loss.item()\n",
-    "        wandb.log({\"train_loss_step\": train_loss.item(), \"epoch\": epoch, \"batch\": batch})\n",
+    "        wandb.log(\n",
+    "            {\"train_loss_step\": train_loss.item(), \"epoch\": epoch, \"batch\": batch}\n",
+    "        )\n",
     "\n",
     "        batch += 1\n",
     "        step += 1\n",
@@ -335,7 +334,6 @@
     "\n",
     "    with torch.no_grad():\n",
     "        for x_cat, x_cont, _, targets in val_loader:\n",
-    "\n",
     "            # for my implementation\n",
     "            logits = clf(x_cat, x_cont).flatten()\n",
     "            logits = logits.flatten()\n",
@@ -347,7 +345,9 @@
     "            correct += (preds == targets).sum().item()\n",
     "\n",
     "            loss_in_epoch_val += val_loss.item()\n",
-    "            wandb.log({\"val_loss_step\": val_loss.item(), \"epoch\": epoch, \"batch\": batch})\n",
+    "            wandb.log(\n",
+    "                {\"val_loss_step\": val_loss.item(), \"epoch\": epoch, \"batch\": batch}\n",
+    "            )\n",
     "\n",
     "            batch += 1\n",
     "\n",
@@ -372,7 +372,7 @@
     "    early_stopping(-val_accuracy)\n",
     "    if early_stopping.early_stop or math.isnan(train_loss) or math.isnan(val_loss):\n",
     "        print(\"meh... early stopping\")\n",
-    "        break\n"
+    "        break"
    ]
   },
   {
@@ -385,7 +385,7 @@
    "outputs": [],
    "source": [
     "cp = glob.glob(f\"checkpoints/{run.id}*\")\n",
-    "print(cp)\n"
+    "print(cp)"
    ]
   },
   {
@@ -397,7 +397,7 @@
    },
    "outputs": [],
    "source": [
-    "clf.load_state_dict(torch.load(cp[0]))\n"
+    "clf.load_state_dict(torch.load(cp[0]))"
    ]
   },
   {
@@ -412,7 +412,6 @@
     "y_pred, y_true = [], []\n",
     "\n",
     "for x_cat, x_cont, _, targets in test_loader:\n",
-    "\n",
     "    logits = clf(x_cat, x_cont).flatten()\n",
     "    logits = logits.flatten()\n",
     "\n",
@@ -420,14 +419,14 @@
     "    # https://stackoverflow.com/a/66910866/5755604\n",
     "    preds = torch.sigmoid(logits.squeeze())\n",
     "    y_pred.append(preds.detach().cpu().numpy())\n",
-    "    y_true.append(targets.detach().cpu().numpy())  \n",
+    "    y_true.append(targets.detach().cpu().numpy())\n",
     "\n",
     "# round prediction to nearest int\n",
     "y_pred = np.rint(np.concatenate(y_pred))\n",
     "y_true = np.concatenate(y_true)\n",
     "\n",
-    "acc = (y_pred == y_true).sum() / len(y_true) \n",
-    "print(acc)\n"
+    "acc = (y_pred == y_true).sum() / len(y_true)\n",
+    "print(acc)"
    ]
   }
  ],
diff --git a/notebooks/4.0c-mb-feature-importances.ipynb b/notebooks/4.0c-mb-feature-importances.ipynb
index a34f6032..be90b3f7 100644
--- a/notebooks/4.0c-mb-feature-importances.ipynb
+++ b/notebooks/4.0c-mb-feature-importances.ipynb
@@ -24,39 +24,33 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "import sys\n",
     "import pickle\n",
+    "import sys\n",
     "from pathlib import Path\n",
     "\n",
-    "from catboost import CatBoostClassifier, Pool\n",
-    "\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
     "import matplotlib as mpl\n",
-    "from matplotlib import rc\n",
-    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
     "import pandas as pd\n",
     "import torch\n",
+    "from catboost import CatBoostClassifier, Pool\n",
+    "from matplotlib import rc\n",
     "from torch import nn\n",
     "\n",
     "sys.path.append(\"..\")\n",
-    "from otc.models.classical_classifier import ClassicalClassifier\n",
-    "\n",
+    "import wandb\n",
     "from sage import GroupedMarginalImputer, PermutationEstimator\n",
+    "from tqdm.auto import tqdm\n",
     "\n",
+    "from otc.data.dataloader import TabDataLoader\n",
+    "from otc.data.dataset import TabDataset\n",
     "from otc.features.build_features import (\n",
     "    features_categorical,\n",
     "    features_classical,\n",
     "    features_classical_size,\n",
     "    features_ml,\n",
     ")\n",
-    "\n",
-    "from otc.data.dataset import TabDataset\n",
-    "from otc.data.dataloader import TabDataLoader\n",
-    "from otc.features.build_features import features_classical_size\n",
-    "\n",
-    "import wandb\n",
-    "from tqdm.auto import tqdm"
+    "from otc.models.classical_classifier import ClassicalClassifier"
    ]
   },
   {
@@ -70,12 +64,12 @@
    "source": [
     "SEED = 42\n",
     "\n",
-    "np.random.seed(42) \n",
+    "np.random.seed(42)\n",
     "\n",
     "# set globally here\n",
-    "EXCHANGE = \"ise\"  \n",
-    "STRATEGY = \"supervised\"  \n",
-    "SUBSET = \"test\"  \n",
+    "EXCHANGE = \"ise\"\n",
+    "STRATEGY = \"supervised\"\n",
+    "SUBSET = \"test\"\n",
     "\n",
     "\n",
     "# Change depending on model!\n",
@@ -113,36 +107,44 @@
    "outputs": [],
    "source": [
     "def get_feature_groups(feature_names, feature_str):\n",
-    "\n",
     "    fg_classical = {\n",
-    "        'chg_all_lead (grouped)': ['price_all_lead', 'chg_all_lead'],\n",
-    "        'chg_all_lag (grouped)': ['price_all_lag', 'chg_all_lag'],\n",
-    "        'chg_ex_lead (grouped)': ['price_ex_lead', 'chg_ex_lead'],\n",
-    "        'chg_ex_lag (grouped)': ['price_ex_lag', 'chg_ex_lag'],\n",
-    "        'quote_best (grouped)': ['BEST_ASK', 'BEST_BID', 'prox_best'],\n",
-    "        'quote_ex (grouped)': ['bid_ex', 'ask_ex','prox_ex' ],\n",
-    "        'TRADE_PRICE': ['TRADE_PRICE'],\n",
-    "        }\n",
-    "    \n",
-    "    fg_size = {'size_ex (grouped)': [ 'bid_ask_size_ratio_ex', 'rel_bid_size_ex',  'rel_ask_size_ex', 'bid_size_ex', 'ask_size_ex','depth_ex'], 'TRADE_SIZE': ['TRADE_SIZE']}\n",
-    "    \n",
+    "        \"chg_all_lead (grouped)\": [\"price_all_lead\", \"chg_all_lead\"],\n",
+    "        \"chg_all_lag (grouped)\": [\"price_all_lag\", \"chg_all_lag\"],\n",
+    "        \"chg_ex_lead (grouped)\": [\"price_ex_lead\", \"chg_ex_lead\"],\n",
+    "        \"chg_ex_lag (grouped)\": [\"price_ex_lag\", \"chg_ex_lag\"],\n",
+    "        \"quote_best (grouped)\": [\"BEST_ASK\", \"BEST_BID\", \"prox_best\"],\n",
+    "        \"quote_ex (grouped)\": [\"bid_ex\", \"ask_ex\", \"prox_ex\"],\n",
+    "        \"TRADE_PRICE\": [\"TRADE_PRICE\"],\n",
+    "    }\n",
+    "\n",
+    "    fg_size = {\n",
+    "        \"size_ex (grouped)\": [\n",
+    "            \"bid_ask_size_ratio_ex\",\n",
+    "            \"rel_bid_size_ex\",\n",
+    "            \"rel_ask_size_ex\",\n",
+    "            \"bid_size_ex\",\n",
+    "            \"ask_size_ex\",\n",
+    "            \"depth_ex\",\n",
+    "        ],\n",
+    "        \"TRADE_SIZE\": [\"TRADE_SIZE\"],\n",
+    "    }\n",
+    "\n",
     "    fg_ml = {\n",
     "        \"STRK_PRC\": [\"STRK_PRC\"],\n",
     "        \"ttm\": [\"ttm\"],\n",
     "        \"option_type\": [\"option_type\"],\n",
-    "        \"root\":[\"root\"],\n",
-    "        \"myn\":[\"myn\"],\n",
-    "        \"day_vol\":[\"day_vol\"], \n",
-    "        \"issue_type\":[\"issue_type\"],\n",
+    "        \"root\": [\"root\"],\n",
+    "        \"myn\": [\"myn\"],\n",
+    "        \"day_vol\": [\"day_vol\"],\n",
+    "        \"issue_type\": [\"issue_type\"],\n",
     "    }\n",
-    "    \n",
+    "\n",
     "    if feature_str.endswith(\"classical\"):\n",
-    "        feature_groups = group_names = fg_classical    \n",
+    "        feature_groups = group_names = fg_classical\n",
     "    if feature_str.endswith(\"classical-size\"):\n",
-    "        feature_groups = group_names = {**fg_classical , **fg_size}\n",
+    "        feature_groups = group_names = {**fg_classical, **fg_size}\n",
     "    if feature_str.endswith(\"ml\"):\n",
-    "        feature_groups = group_names = {**fg_classical, **fg_size, **fg_ml}      \n",
-    "    \n",
+    "        feature_groups = group_names = {**fg_classical, **fg_size, **fg_ml}\n",
     "\n",
     "    # Group indices\n",
     "    groups = []\n",
@@ -152,7 +154,7 @@
     "            ind_list.append(feature_names.index(feature))\n",
     "        groups.append(ind_list)\n",
     "\n",
-    "    return groups, group_names\n"
+    "    return groups, group_names"
    ]
   },
   {
@@ -172,7 +174,11 @@
     "artifact = run.use_artifact(dataset)\n",
     "data_dir = artifact.download()\n",
     "\n",
-    "data = pd.read_parquet(Path(data_dir, \"test_set.parquet\"), engine=\"fastparquet\", columns=[*features_classical_size, \"buy_sell\"])\n",
+    "data = pd.read_parquet(\n",
+    "    Path(data_dir, \"test_set.parquet\"),\n",
+    "    engine=\"fastparquet\",\n",
+    "    columns=[*features_classical_size, \"buy_sell\"],\n",
+    ")\n",
     "\n",
     "y_test = data[\"buy_sell\"]\n",
     "X_test = data.drop(columns=\"buy_sell\")\n",
@@ -223,20 +229,27 @@
     "# compare benchmarks\n",
     "configs = [\n",
     "    [(\"quote\", \"best\"), (\"quote\", \"ex\"), (\"rev_tick\", \"all\")],\n",
-    "    [(\"trade_size\", \"ex\"), (\"quote\", \"best\"),  (\"quote\", \"ex\"), (\"depth\", \"best\"), (\"depth\", \"ex\"), (\"rev_tick\", \"all\")]  \n",
+    "    [\n",
+    "        (\"trade_size\", \"ex\"),\n",
+    "        (\"quote\", \"best\"),\n",
+    "        (\"quote\", \"ex\"),\n",
+    "        (\"depth\", \"best\"),\n",
+    "        (\"depth\", \"ex\"),\n",
+    "        (\"rev_tick\", \"all\"),\n",
+    "    ],\n",
     "]\n",
     "\n",
     "results = []\n",
     "for config in configs:\n",
-    "    \n",
-    "    groups, group_names = get_feature_groups(X_importance.columns.tolist(), \"classical-size\")\n",
-    "    \n",
+    "    groups, group_names = get_feature_groups(\n",
+    "        X_importance.columns.tolist(), \"classical-size\"\n",
+    "    )\n",
+    "\n",
     "    clf = ClassicalClassifier(layers=config, random_state=SEED, strategy=\"random\")\n",
     "    # only set headers etc, no leakage\n",
     "    clf.fit(X=X_test.head(5), y=y_test.head(5))\n",
-    "    \n",
+    "\n",
     "    def call_classical(X):\n",
-    "        \n",
     "        pred = clf.predict_proba(X)\n",
     "        # max_class = np.argmax(pred, axis=-1)\n",
     "        # return max_class\n",
@@ -245,12 +258,14 @@
     "    # apply group based imputation + estimate importances in terms of zero-one loss\n",
     "    imputer = GroupedMarginalImputer(call_classical, X_importance.values, groups)\n",
     "    estimator = PermutationEstimator(imputer, \"zero one\")\n",
-    "    \n",
+    "\n",
     "    # calculate values over entire test set\n",
     "    sage_values = estimator(X_test.values, y_test.values.clip(0))\n",
-    "    \n",
+    "\n",
     "    # save sage values + std deviation to data frame\n",
-    "    result = pd.DataFrame(index=group_names, data={\"values\": sage_values.values, \"std\": sage_values.std})\n",
+    "    result = pd.DataFrame(\n",
+    "        index=group_names, data={\"values\": sage_values.values, \"std\": sage_values.std}\n",
+    "    )\n",
     "    results.append(result)"
    ]
   },
@@ -274,7 +289,7 @@
     "results_df = pd.concat(results, axis=1, keys=names)\n",
     "\n",
     "# flatten column names (required to save to parquet)\n",
-    "results_df.columns = [' '.join(col).strip() for col in results_df.columns.values]"
+    "results_df.columns = [\" \".join(col).strip() for col in results_df.columns.values]"
    ]
   },
   {
@@ -298,7 +313,9 @@
    "source": [
     "KEY = f\"{EXCHANGE}_{STRATEGY}_{SUBSET}_classical_feature_importance_{sample_size}\"\n",
     "\n",
-    "URI_FI_CLASSICAL = f\"gs://thesis-bucket-option-trade-classification/data/results/{KEY}.parquet\"\n",
+    "URI_FI_CLASSICAL = (\n",
+    "    f\"gs://thesis-bucket-option-trade-classification/data/results/{KEY}.parquet\"\n",
+    ")\n",
     "\n",
     "results_df.to_parquet(URI_FI_CLASSICAL)\n",
     "\n",
@@ -340,7 +357,11 @@
     "artifact = run.use_artifact(dataset)\n",
     "data_dir = artifact.download()\n",
     "\n",
-    "data = pd.read_parquet(Path(data_dir, \"test_set.parquet\"), engine=\"fastparquet\", columns=[*features_ml, \"buy_sell\"])\n",
+    "data = pd.read_parquet(\n",
+    "    Path(data_dir, \"test_set.parquet\"),\n",
+    "    engine=\"fastparquet\",\n",
+    "    columns=[*features_ml, \"buy_sell\"],\n",
+    ")\n",
     "\n",
     "y_test = data[\"buy_sell\"]\n",
     "X_test = data.drop(columns=\"buy_sell\")\n",
@@ -372,64 +393,64 @@
    },
    "outputs": [],
    "source": [
-    "configs = [(\"classical\", \"1gzk7msy_CatBoostClassifier_default.cbm:latest\"),\n",
+    "configs = [\n",
+    "    (\"classical\", \"1gzk7msy_CatBoostClassifier_default.cbm:latest\"),\n",
     "    (\"classical-size\", \"3vntumoi_CatBoostClassifier_default.cbm:latest\"),\n",
     "    (\"ml\", \"2t5zo50f_CatBoostClassifier_default.cbm:latest\"),\n",
     "    (\"semi-classical\", \"37lymmzc_CatBoostClassifier_default.cbm:latest\"),\n",
     "    (\"semi-classical-size\", \"1vmti6db_CatBoostClassifier_default.cbm:latest\"),\n",
-    "    (\"semi-ml\", \"t55nd8r0_CatBoostClassifier_default.cbm:latest\")]\n",
+    "    (\"semi-ml\", \"t55nd8r0_CatBoostClassifier_default.cbm:latest\"),\n",
+    "]\n",
     "\n",
     "results = []\n",
     "\n",
     "for feature_str, model in configs:\n",
-    "    \n",
     "    # get feature names and slice to subset\n",
     "    fs = FEATURE_MAP.get(feature_str)\n",
     "    X_importance_fs = X_importance.loc[:, fs]\n",
     "    X_importance_cols = X_importance_fs.columns.tolist()\n",
-    "    \n",
+    "\n",
     "    # calculate cat indices\n",
     "    if feature_str.endswith(\"ml\"):\n",
     "        cat_features = [t[0] for t in features_categorical]\n",
     "        cat_idx = [X_importance_cols.index(f) for f in cat_features]\n",
-    "    \n",
+    "\n",
     "    # get groups\n",
     "    groups, group_names = get_feature_groups(X_importance_cols, feature_str)\n",
-    "    \n",
+    "\n",
     "    #  load model by identifier from wandb\n",
     "    model_name = model.split(\"/\")[-1].split(\":\")[0]\n",
-    "    \n",
+    "\n",
     "    artifact = run.use_artifact(model)\n",
     "    model_dir = artifact.download()\n",
     "    clf = CatBoostClassifier()\n",
     "    clf.load_model(fname=Path(model_dir, model_name))\n",
-    "    \n",
-    "    \n",
+    "\n",
     "    # use callable instead of default catboost as it doesn't work with categoricals otherwise\n",
-    "    pred=None\n",
-    "    \n",
+    "    pred = None\n",
+    "\n",
     "    def call_catboost(X):\n",
-    "        if feature_str.endswith(\"ml\"):       \n",
+    "        if feature_str.endswith(\"ml\"):\n",
     "            # convert categorical to int\n",
     "            X = pd.DataFrame(X, columns=X_importance.columns)\n",
     "            # Update the selected columns in the original DataFrame\n",
     "            X[cat_features] = X.iloc[:, cat_idx].astype(int)\n",
     "            # pass cat indices\n",
     "            return clf.predict_proba(Pool(X, cat_features=cat_idx))\n",
-    "        else:\n",
-    "            return clf.predict_proba(X)\n",
-    "            \n",
-    "    \n",
+    "        return clf.predict_proba(X)\n",
+    "\n",
     "    # apply group based imputation + estimate importances in terms of zero-one loss\n",
     "    imputer = GroupedMarginalImputer(call_catboost, X_importance_fs, groups)\n",
     "    # imputer = MarginalImputer(call_catboost, X_importance_fs)\n",
     "    estimator = PermutationEstimator(imputer, \"zero one\")\n",
-    "    \n",
+    "\n",
     "    # calculate values over entire test set\n",
-    "    sage_values = estimator(X_test.loc[:,fs].values, y_test.clip(0).values)\n",
-    "    \n",
+    "    sage_values = estimator(X_test.loc[:, fs].values, y_test.clip(0).values)\n",
+    "\n",
     "    # save sage values + std deviation to data frame\n",
-    "    result = pd.DataFrame(index=group_names, data={\"values\": sage_values.values, \"std\": sage_values.std})\n",
+    "    result = pd.DataFrame(\n",
+    "        index=group_names, data={\"values\": sage_values.values, \"std\": sage_values.std}\n",
+    "    )\n",
     "    # result = pd.DataFrame(index=X_importance_cols, data={\"values\": sage_values.values, \"std\": sage_values.std})\n",
     "    results.append(result)"
    ]
@@ -445,7 +466,7 @@
    "source": [
     "names = [f\"gbm({feature_str[0]})\" for feature_str in configs]\n",
     "results_df = pd.concat(results, axis=1, keys=names)\n",
-    "results_df.columns = [' '.join(col).strip() for col in results_df.columns.values]"
+    "results_df.columns = [\" \".join(col).strip() for col in results_df.columns.values]"
    ]
   },
   {
@@ -472,12 +493,14 @@
     "# list to data frame + set human readable names\n",
     "names = [f\"gbm({feature_str[0]})\" for feature_str in configs]\n",
     "results_df = pd.concat(results, axis=1, keys=names)\n",
-    "results_df.columns = [' '.join(col).strip() for col in results_df.columns.values]\n",
+    "results_df.columns = [\" \".join(col).strip() for col in results_df.columns.values]\n",
     "\n",
     "# save to google clound and save identiifer\n",
     "KEY = f\"{EXCHANGE}_{STRATEGY}_{SUBSET}_gbm_feature_importance_{sample_size}\"\n",
     "\n",
-    "URI_FI_GBM = f\"gs://thesis-bucket-option-trade-classification/data/results/{KEY}.parquet\"\n",
+    "URI_FI_GBM = (\n",
+    "    f\"gs://thesis-bucket-option-trade-classification/data/results/{KEY}.parquet\"\n",
+    ")\n",
     "\n",
     "results_df.to_parquet(URI_FI_GBM)\n",
     "\n",
@@ -514,10 +537,10 @@
    "source": [
     "configs = [\n",
     "    (\"classical\", \"3jpe46s1_TransformerClassifier_default.pkl:latest\"),\n",
-    "    (\"classical-size\", \"1qx3ul4j_TransformerClassifier_default.pkl:latest\"), \n",
+    "    (\"classical-size\", \"1qx3ul4j_TransformerClassifier_default.pkl:latest\"),\n",
     "    (\"ml\", \"2h81aiow_TransformerClassifier_default.pkl:latest\"),\n",
     "    (\"semi-classical\", \"12isqh2m_TransformerClassifier_default.pkl:latest\"),\n",
-    "    (\"semi-classical-size\", \"2hv1nayy_TransformerClassifier_default.pkl:latest\"), \n",
+    "    (\"semi-classical-size\", \"2hv1nayy_TransformerClassifier_default.pkl:latest\"),\n",
     "    (\"semi-ml\", \"3jbqpp4r_TransformerClassifier_default.pkl:latest\"),\n",
     "]\n",
     "\n",
@@ -531,32 +554,34 @@
     "    fs = FEATURE_MAP.get(feature_str)\n",
     "    X_importance_fs = X_importance.loc[:, fs]\n",
     "    X_importance_cols = X_importance_fs.columns.tolist()\n",
-    "    \n",
+    "\n",
     "    # calculate cat indices\n",
     "    if feature_str.endswith(\"ml\"):\n",
     "        cat_features = [t[0] for t in features_categorical]\n",
     "        cat_idx = [X_importance_cols.index(f) for f in cat_features]\n",
-    "    \n",
+    "\n",
     "    # get groups\n",
     "    groups, group_names = get_feature_groups(X_importance_cols, feature_str)\n",
-    "    \n",
+    "\n",
     "    model_name = model.split(\"/\")[-1].split(\":\")[0]\n",
     "\n",
     "    artifact = run.use_artifact(model)\n",
     "    model_dir = artifact.download()\n",
     "\n",
-    "    with open(Path(model_dir, model_name), 'rb') as f:\n",
+    "    with open(Path(model_dir, model_name), \"rb\") as f:\n",
     "        clf = pickle.load(f)\n",
-    "    \n",
+    "\n",
     "    # apply group based imputation + estimate importances in terms of zero-one loss\n",
     "    imputer = GroupedMarginalImputer(clf, X_importance_fs, groups)\n",
     "    estimator = PermutationEstimator(imputer, \"zero one\")\n",
-    "    \n",
+    "\n",
     "    # calculate values over entire test set\n",
-    "    sage_values = estimator(X_test.loc[:,fs].values, y_test.clip(0).values)\n",
-    "    \n",
+    "    sage_values = estimator(X_test.loc[:, fs].values, y_test.clip(0).values)\n",
+    "\n",
     "    # save sage values + std deviation to data frame\n",
-    "    result = pd.DataFrame(index=group_names, data={\"values\": sage_values.values, \"std\": sage_values.std})\n",
+    "    result = pd.DataFrame(\n",
+    "        index=group_names, data={\"values\": sage_values.values, \"std\": sage_values.std}\n",
+    "    )\n",
     "    results.append(result)"
    ]
   },
@@ -572,12 +597,14 @@
     "# list to data frame + set human readable names\n",
     "names = [f\"fttransformer({feature_str[0]})\" for feature_str in configs]\n",
     "results_df = pd.concat(results, axis=1, keys=names)\n",
-    "results_df.columns = [' '.join(col).strip() for col in results_df.columns.values]\n",
+    "results_df.columns = [\" \".join(col).strip() for col in results_df.columns.values]\n",
     "\n",
     "# save to google clound and save identiifer\n",
     "KEY = f\"{EXCHANGE}_{STRATEGY}_{SUBSET}_fttransformer_feature_importance_{sample_size}\"\n",
     "\n",
-    "URI_FI_FTTRANSFORMER = f\"gs://thesis-bucket-option-trade-classification/data/results/{KEY}.parquet\"\n",
+    "URI_FI_FTTRANSFORMER = (\n",
+    "    f\"gs://thesis-bucket-option-trade-classification/data/results/{KEY}.parquet\"\n",
+    ")\n",
     "\n",
     "results_df.to_parquet(URI_FI_FTTRANSFORMER)\n",
     "\n",
@@ -627,7 +654,7 @@
     "plt.rcParams.update(params)\n",
     "rc(\"text\", usetex=True)\n",
     "\n",
-    "plt.rc('text.latex', preamble=r'\\usepackage{amsmath}\\usepackage[utf8]{inputenc}')\n",
+    "plt.rc(\"text.latex\", preamble=r\"\\usepackage{amsmath}\\usepackage[utf8]{inputenc}\")\n",
     "\n",
     "CM = 1 / 2.54\n",
     "\n",
@@ -651,10 +678,10 @@
     "\n",
     "artifact = run.use_artifact(MODEL)\n",
     "model_dir = artifact.download()\n",
-    "    \n",
-    "with open(Path(model_dir, model_name), 'rb') as f:\n",
+    "\n",
+    "with open(Path(model_dir, model_name), \"rb\") as f:\n",
     "    model = pickle.load(f)\n",
-    "    \n",
+    "\n",
     "clf = model.clf"
    ]
   },
@@ -672,7 +699,11 @@
     "artifact = run.use_artifact(dataset)\n",
     "data_dir = artifact.download()\n",
     "\n",
-    "data = pd.read_parquet(Path(data_dir, \"test_set.parquet\"), engine=\"fastparquet\", columns=[*features_ml, \"buy_sell\"])\n",
+    "data = pd.read_parquet(\n",
+    "    Path(data_dir, \"test_set.parquet\"),\n",
+    "    engine=\"fastparquet\",\n",
+    "    columns=[*features_ml, \"buy_sell\"],\n",
+    ")\n",
     "\n",
     "y_test = data[\"buy_sell\"]\n",
     "X_test = data.drop(columns=\"buy_sell\")"
@@ -716,17 +747,72 @@
     "\n",
     "\n",
     "# at mid\n",
-    "idx =  [39342276, 39342363, 39342387, 39342437, 39342436, 39342428,\n",
-    "            39342464, 39342540, 39342608, 39342598, 39342620, 39342632,\n",
-    "            39342674, 39342781, 39342804, 39342824, 39342818, 39342821,\n",
-    "            39342861, 39342871, 39342894, 39342898, 39342931, 39342934,\n",
-    "            39342948, 39342954, 39342960, 39342969, 39342986, 39342987,\n",
-    "            39342991, 39342992, 39343036, 39343082, 39343100, 39343098,\n",
-    "            39343099, 39343101, 39343102, 39343109, 39343112, 39343124,\n",
-    "            39343128, 39343165, 39343193, 39343199, 39343211, 39343215,\n",
-    "            39343234, 39343242, 39343298, 39343346, 39343370, 39343390,\n",
-    "            39343412, 39343413, 39343415, 39343414, 39343426, 39343433,\n",
-    "            39343465, 39343464, 39343485, 39343498]"
+    "idx = [\n",
+    "    39342276,\n",
+    "    39342363,\n",
+    "    39342387,\n",
+    "    39342437,\n",
+    "    39342436,\n",
+    "    39342428,\n",
+    "    39342464,\n",
+    "    39342540,\n",
+    "    39342608,\n",
+    "    39342598,\n",
+    "    39342620,\n",
+    "    39342632,\n",
+    "    39342674,\n",
+    "    39342781,\n",
+    "    39342804,\n",
+    "    39342824,\n",
+    "    39342818,\n",
+    "    39342821,\n",
+    "    39342861,\n",
+    "    39342871,\n",
+    "    39342894,\n",
+    "    39342898,\n",
+    "    39342931,\n",
+    "    39342934,\n",
+    "    39342948,\n",
+    "    39342954,\n",
+    "    39342960,\n",
+    "    39342969,\n",
+    "    39342986,\n",
+    "    39342987,\n",
+    "    39342991,\n",
+    "    39342992,\n",
+    "    39343036,\n",
+    "    39343082,\n",
+    "    39343100,\n",
+    "    39343098,\n",
+    "    39343099,\n",
+    "    39343101,\n",
+    "    39343102,\n",
+    "    39343109,\n",
+    "    39343112,\n",
+    "    39343124,\n",
+    "    39343128,\n",
+    "    39343165,\n",
+    "    39343193,\n",
+    "    39343199,\n",
+    "    39343211,\n",
+    "    39343215,\n",
+    "    39343234,\n",
+    "    39343242,\n",
+    "    39343298,\n",
+    "    39343346,\n",
+    "    39343370,\n",
+    "    39343390,\n",
+    "    39343412,\n",
+    "    39343413,\n",
+    "    39343415,\n",
+    "    39343414,\n",
+    "    39343426,\n",
+    "    39343433,\n",
+    "    39343465,\n",
+    "    39343464,\n",
+    "    39343485,\n",
+    "    39343498,\n",
+    "]"
    ]
   },
   {
@@ -746,22 +832,22 @@
     "cat_unique_counts = model.module_params[\"cat_cardinalities\"]\n",
     "\n",
     "dl_params = {\n",
-    "    \"batch_size\": batch_size,  \n",
+    "    \"batch_size\": batch_size,\n",
     "    \"shuffle\": False,\n",
     "    \"device\": device,\n",
     "}\n",
     "\n",
-    "test_data = TabDataset(X_test[X_test.index.isin(idx)], y_test[y_test.index.isin(idx)], cat_features=cat_features, cat_unique_counts=cat_unique_counts)\n",
+    "test_data = TabDataset(\n",
+    "    X_test[X_test.index.isin(idx)],\n",
+    "    y_test[y_test.index.isin(idx)],\n",
+    "    cat_features=cat_features,\n",
+    "    cat_unique_counts=cat_unique_counts,\n",
+    ")\n",
     "\n",
     "\n",
     "test_loader = TabDataLoader(\n",
-    "    test_data.x_cat,\n",
-    "    test_data.x_cont,\n",
-    "    test_data.weight,\n",
-    "    test_data.y,\n",
-    "    **dl_params\n",
-    ")\n",
-    "\n"
+    "    test_data.x_cat, test_data.x_cont, test_data.weight, test_data.y, **dl_params\n",
+    ")"
    ]
   },
   {
@@ -824,20 +910,19 @@
     "grads = []\n",
     "\n",
     "for i, block in enumerate(clf.transformer.blocks):\n",
-    "\n",
     "    grad = block.attention.get_attn_gradients().detach()\n",
     "    cam = block.attention.get_attn().detach()\n",
-    "    \n",
+    "\n",
     "    cams.append(cam)\n",
     "    grads.append(grad)\n",
-    "    \n",
+    "\n",
     "    # reshape to [batch_size x num_head, num_tokens, num_tokens]\n",
     "    cam = cam.reshape(-1, cam.shape[-1], cam.shape[-1])\n",
     "    grad = grad.reshape(-1, grad.shape[-1], grad.shape[-1])\n",
-    "    \n",
+    "\n",
     "    # dot product\n",
     "    cam = grad * cam\n",
-    "    \n",
+    "\n",
     "    # reshape to [batch_size, num_head, num_tokens, num_tokens]\n",
     "    cam = cam.reshape(batch_size, -1, cam.shape[-1], cam.shape[-1])\n",
     "    # clamp negative values, calculate mean over heads\n",
@@ -874,10 +959,10 @@
     "max_stack = 16\n",
     "\n",
     "for i in range(max_stack):\n",
-    "    row = batch_probs[-i][0,1:]\n",
+    "    row = batch_probs[-i][0, 1:]\n",
     "    # row = test[np.newaxis,...]\n",
     "    stack.append(row)\n",
-    "    \n",
+    "\n",
     "stack_np = np.vstack(stack)"
    ]
   },
@@ -904,35 +989,37 @@
    },
    "outputs": [],
    "source": [
-    "labels_sanitized = ['trade price',\n",
-    " 'bid (ex)',\n",
-    " 'ask (ex)',\n",
-    " 'ask (best)',\n",
-    " 'bid (best)',\n",
-    " 'price lag (ex)',\n",
-    " 'price lead (ex)',\n",
-    " 'price lag (all)',\n",
-    " 'price lead (all)',\n",
-    " 'chg lead (ex)',\n",
-    " 'chg lag (ex)',\n",
-    " 'chg lead (all)',\n",
-    " 'chg lag (all)',\n",
-    " 'prox (ex)',\n",
-    " 'prox (best)',\n",
-    " 'bid ask size ratio (ex)',\n",
-    " 'rel. bid size (ex)',\n",
-    " 'rel. ask size (ex)',\n",
-    " 'trade size',\n",
-    " 'bid size (ex)',\n",
-    " 'ask size (ex)',\n",
-    " 'depth (ex)',\n",
-    " 'strike price',\n",
-    " 'time to maturity',\n",
-    " 'moneyness',\n",
-    " 'day volume',\n",
-    " 'option type',\n",
-    " 'issue type',\n",
-    " 'root']"
+    "labels_sanitized = [\n",
+    "    \"trade price\",\n",
+    "    \"bid (ex)\",\n",
+    "    \"ask (ex)\",\n",
+    "    \"ask (best)\",\n",
+    "    \"bid (best)\",\n",
+    "    \"price lag (ex)\",\n",
+    "    \"price lead (ex)\",\n",
+    "    \"price lag (all)\",\n",
+    "    \"price lead (all)\",\n",
+    "    \"chg lead (ex)\",\n",
+    "    \"chg lag (ex)\",\n",
+    "    \"chg lead (all)\",\n",
+    "    \"chg lag (all)\",\n",
+    "    \"prox (ex)\",\n",
+    "    \"prox (best)\",\n",
+    "    \"bid ask size ratio (ex)\",\n",
+    "    \"rel. bid size (ex)\",\n",
+    "    \"rel. ask size (ex)\",\n",
+    "    \"trade size\",\n",
+    "    \"bid size (ex)\",\n",
+    "    \"ask size (ex)\",\n",
+    "    \"depth (ex)\",\n",
+    "    \"strike price\",\n",
+    "    \"time to maturity\",\n",
+    "    \"moneyness\",\n",
+    "    \"day volume\",\n",
+    "    \"option type\",\n",
+    "    \"issue type\",\n",
+    "    \"root\",\n",
+    "]"
    ]
   },
   {
@@ -956,12 +1043,12 @@
    },
    "outputs": [],
    "source": [
-    "fig, ax = plt.subplots(1, 2, figsize=(14*CM,10*CM), sharey=True)\n",
-    "ax[0].imshow(stack_np.T, cmap='Blues', interpolation='nearest')\n",
+    "fig, ax = plt.subplots(1, 2, figsize=(14 * CM, 10 * CM), sharey=True)\n",
+    "ax[0].imshow(stack_np.T, cmap=\"Blues\", interpolation=\"nearest\")\n",
     "ax[0].yaxis.set_ticks(list(range(len(labels_sanitized))))\n",
     "ax[0].set_yticklabels(labels_sanitized)\n",
     "ax[0].set_xlabel(\"At Quotes\")\n",
-    "ax[1].imshow(stack_np_copy.T, cmap='Blues', interpolation='nearest')\n",
+    "ax[1].imshow(stack_np_copy.T, cmap=\"Blues\", interpolation=\"nearest\")\n",
     "ax[1].yaxis.set_ticks(list(range(len(labels_sanitized))))\n",
     "ax[1].set_yticklabels(labels_sanitized, fontsize=\"x-small\")\n",
     "ax[1].set_xlabel(\"At Mid\")\n",
@@ -978,7 +1065,7 @@
    },
    "outputs": [],
    "source": [
-    "labels_detail = [\"$\\mathtt{[CLS]}$\", *labels_sanitized]"
+    "labels_detail = [r\"$\\mathtt{[CLS]}$\", *labels_sanitized]"
    ]
   },
   {
@@ -1014,7 +1101,7 @@
    },
    "outputs": [],
    "source": [
-    "labels_left = ['$\\\\mathtt{[CLS]}$', *[\"...\"]*(len(labels_detail) - 1)]"
+    "labels_left = [\"$\\\\mathtt{[CLS]}$\", *[\"...\"] * (len(labels_detail) - 1)]"
    ]
   },
   {
@@ -1052,7 +1139,7 @@
    "source": [
     "from matplotlib.pyplot import cm\n",
     "\n",
-    "plt.figure(figsize=(3*CM,10*CM))\n",
+    "plt.figure(figsize=(3 * CM, 10 * CM))\n",
     "\n",
     "\n",
     "yoffset = 0\n",
@@ -1076,7 +1163,7 @@
     "h = 0\n",
     "\n",
     "cam = cams[l].reshape(batch_size, -1, cam.shape[-1], cam.shape[-1])\n",
-    "attention = cam[0,h,:,:]\n",
+    "attention = cam[0, h, :, :]\n",
     "attention /= attention.sum(axis=-1, keepdims=True)\n",
     "\n",
     "\n",
@@ -1084,22 +1171,40 @@
     "color = iter(cm.rainbow(np.linspace(0, 1, heads * layer)))\n",
     "\n",
     "for position, word in enumerate(labels_left):\n",
-    "    plt.text(0, yoffset - position * word_height, word,\n",
-    "                ha=\"right\", va=\"center\", size=\"x-small\")\n",
+    "    plt.text(\n",
+    "        0,\n",
+    "        yoffset - position * word_height,\n",
+    "        word,\n",
+    "        ha=\"right\",\n",
+    "        va=\"center\",\n",
+    "        size=\"x-small\",\n",
+    "    )\n",
     "for position, word in enumerate(labels_detail):\n",
-    "    plt.text(width, yoffset - position * word_height, word,\n",
-    "                ha=\"left\", va=\"center\", size=\"x-small\")\n",
+    "    plt.text(\n",
+    "        width,\n",
+    "        yoffset - position * word_height,\n",
+    "        word,\n",
+    "        ha=\"left\",\n",
+    "        va=\"center\",\n",
+    "        size=\"x-small\",\n",
+    "    )\n",
     "# focus on cls token\n",
     "c = next(color)\n",
     "# CLS is prepended, get first row, similar to chefer\n",
     "for i, vec in enumerate(attention[0:1]):\n",
     "    for j, el in enumerate(vec):\n",
-    "        plt.plot([xoffset + pad, xoffset + width - pad],\n",
-    "                    [yoffset - word_height * i, yoffset - word_height * j],\n",
-    "                    color=c, linewidth=2, alpha=el.item())\n",
-    "plt.axis('off')\n",
+    "        plt.plot(\n",
+    "            [xoffset + pad, xoffset + width - pad],\n",
+    "            [yoffset - word_height * i, yoffset - word_height * j],\n",
+    "            color=c,\n",
+    "            linewidth=2,\n",
+    "            alpha=el.item(),\n",
+    "        )\n",
+    "plt.axis(\"off\")\n",
     "plt.tight_layout()\n",
-    "plt.savefig(f\"../reports/Graphs/attention_head_{h+1}_layer_{l+1}_{key}.pdf\", bbox_inches=\"tight\")"
+    "plt.savefig(\n",
+    "    f\"../reports/Graphs/attention_head_{h+1}_layer_{l+1}_{key}.pdf\", bbox_inches=\"tight\"\n",
+    ")"
    ]
   },
   {
@@ -1113,7 +1218,7 @@
    "source": [
     "from matplotlib.pyplot import cm\n",
     "\n",
-    "plt.figure(figsize=(36,6))\n",
+    "plt.figure(figsize=(36, 6))\n",
     "\n",
     "\n",
     "yoffset = 0\n",
@@ -1141,14 +1246,13 @@
     "color = iter(cm.rainbow(np.linspace(0, 1, heads * layer)))\n",
     "\n",
     "for l in range(layer):\n",
-    "\n",
-    "    for h in range (heads):\n",
+    "    for h in range(heads):\n",
     "        # [batch x head x attn x dim attn]\n",
     "\n",
     "        cam = cams[l].reshape(batch_size, -1, cam.shape[-1], cam.shape[-1])\n",
     "\n",
     "        # [first in batch, head h, :,:]\n",
-    "        attention = cam[0,h,:,:]\n",
+    "        attention = cam[0, h, :, :]\n",
     "\n",
     "        attention /= attention.sum(axis=-1, keepdims=True)\n",
     "\n",
@@ -1165,17 +1269,23 @@
     "        c = next(color)\n",
     "        for i, vec in enumerate(attention[0:1]):\n",
     "            for j, el in enumerate(vec):\n",
-    "                axes[l,h].plot([pad, width - pad], # x axis\n",
-    "                         [word_height * i, word_height * j],\n",
-    "                         color=c, linewidth=2, alpha=el.item())\n",
-    "\n",
-    "        axes[l,h].set_title(f\"head {l+1,h+1}\", size='xx-small')\n",
-    "# fig.tight_layout()\n",
-    "        axes[l,h].set_xticks([])\n",
-    "        axes[l,h].set_yticks([])\n",
+    "                axes[l, h].plot(\n",
+    "                    [pad, width - pad],  # x axis\n",
+    "                    [word_height * i, word_height * j],\n",
+    "                    color=c,\n",
+    "                    linewidth=2,\n",
+    "                    alpha=el.item(),\n",
+    "                )\n",
+    "\n",
+    "        axes[l, h].set_title(f\"head {l+1,h+1}\", size=\"xx-small\")\n",
+    "        # fig.tight_layout()\n",
+    "        axes[l, h].set_xticks([])\n",
+    "        axes[l, h].set_yticks([])\n",
     "        # axes[l,h].axis('off')\n",
     "\n",
-    "plt.savefig(f\"../reports/Graphs/attention_heads_layer_all_{key}.pdf\", bbox_inches=\"tight\")"
+    "plt.savefig(\n",
+    "    f\"../reports/Graphs/attention_heads_layer_all_{key}.pdf\", bbox_inches=\"tight\"\n",
+    ")"
    ]
   },
   {
@@ -1185,8 +1295,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
-    "data = {\"grads\":grads, \"cams\":cams, \"final-scores\":stack_np_copy}"
+    "data = {\"grads\": grads, \"cams\": cams, \"final-scores\": stack_np_copy}"
    ]
   },
   {
@@ -1199,10 +1308,10 @@
    "outputs": [],
    "source": [
     "# Specify the file path where you want to save the pickle file\n",
-    "file_path = 'data.pickle'\n",
+    "file_path = \"data.pickle\"\n",
     "\n",
     "# Open the file in binary mode and write the dictionary to it\n",
-    "with open(file_path, 'wb') as file:\n",
+    "with open(file_path, \"wb\") as file:\n",
     "    pickle.dump(data, file)"
    ]
   }
diff --git a/notebooks/4.0e-mb-fttransformer-pretraining.ipynb b/notebooks/4.0e-mb-fttransformer-pretraining.ipynb
index 9f3c32ef..68d8cb8d 100644
--- a/notebooks/4.0e-mb-fttransformer-pretraining.ipynb
+++ b/notebooks/4.0e-mb-fttransformer-pretraining.ipynb
@@ -11,26 +11,22 @@
     "from pathlib import Path\n",
     "\n",
     "import pandas as pd\n",
-    "\n",
-    "from tqdm.auto import tqdm\n",
-    "\n",
     "import torch\n",
     "import torch.nn as nn\n",
     "import torch.optim as optim\n",
-    "\n",
     "import wandb\n",
+    "from tqdm.auto import tqdm\n",
     "\n",
+    "from otc.data.dataloader import TabDataLoader\n",
+    "from otc.data.dataset import TabDataset\n",
+    "from otc.features.build_features import features_classical_size\n",
     "from otc.models.activation import ReGLU\n",
     "from otc.models.fttransformer import (\n",
+    "    CLSHead,\n",
     "    FeatureTokenizer,\n",
     "    FTTransformer,\n",
     "    Transformer,\n",
-    "    CLSHead,\n",
     ")\n",
-    "\n",
-    "from otc.data.dataset import TabDataset\n",
-    "from otc.data.dataloader import TabDataLoader\n",
-    "from otc.features.build_features import features_classical_size\n",
     "from otc.optim.early_stopping import EarlyStopping\n",
     "from otc.optim.scheduler import CosineWarmupScheduler"
    ]
@@ -64,12 +60,12 @@
    "outputs": [],
    "source": [
     "# preserve relative ordering, sample for testing ache\n",
-    "frac = 1 #0.05\n",
+    "frac = 1  # 0.05\n",
     "\n",
     "X_train = pd.read_parquet(Path(data_dir, \"train_set.parquet\"), engine=\"fastparquet\")\n",
     "X_train = X_train.sample(frac=frac, random_state=42)\n",
     "\n",
-    "y_train = X_train[\"buy_sell\"] # here: y = 0\n",
+    "y_train = X_train[\"buy_sell\"]  # here: y = 0\n",
     "X_train = X_train[features_classical_size]"
    ]
   },
@@ -91,13 +87,12 @@
    "outputs": [],
    "source": [
     "def gen_perm(X):\n",
-    "    \"\"\"\n",
-    "    Generate index permutation.\n",
-    "    \"\"\"\n",
+    "    \"\"\"Generate index permutation.\"\"\"\n",
     "    if X is None:\n",
     "        return None\n",
     "    return torch.randint_like(X, X.shape[0], dtype=torch.long)\n",
     "\n",
+    "\n",
     "x_cont_perm = gen_perm(x_cont)\n",
     "x_cat_perm = gen_perm(x_cat)"
    ]
@@ -108,14 +103,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def gen_masks(X, perm, corrupt_probability = 0.15):\n",
-    "    \"\"\"\n",
-    "    Generate binary mask for detection.\n",
-    "    \"\"\"\n",
+    "def gen_masks(X, perm, corrupt_probability=0.15):\n",
+    "    \"\"\"Generate binary mask for detection.\"\"\"\n",
     "    masks = torch.empty_like(X).bernoulli(p=corrupt_probability).bool()\n",
-    "    new_masks = masks & (X != X[perm, torch.arange(X.shape[1], device=X.device)])\n",
+    "    new_masks = masks & (X[perm, torch.arange(X.shape[1], device=X.device)] != X)\n",
     "    return new_masks\n",
     "\n",
+    "\n",
     "# generate masks for numeric and for categorical features (optional)\n",
     "x_cont_mask = gen_masks(training_data.x_cont, x_cont_perm)\n",
     "\n",
@@ -138,10 +132,9 @@
     "x_cont[x_cont_mask] = x_cont_permuted[x_cont_mask]\n",
     "\n",
     "if x_cat is not None:\n",
-    "\n",
     "    # along the 0 axis get elements based on perm_cat\n",
     "    x_cat_permuted = torch.gather(x_cat, 0, x_cat_perm)\n",
-    "    \n",
+    "\n",
     "    # replace at mask\n",
     "    x_cat[x_cat_mask] = x_cat_permuted[x_cat_mask]"
    ]
@@ -166,7 +159,7 @@
    "outputs": [],
    "source": [
     "# split up into train (first 80 %) and val (last 20 %)\n",
-    "idx = int (len(x_cont) * 0.8)\n",
+    "idx = int(len(x_cont) * 0.8)\n",
     "\n",
     "x_cont_train, x_cont_val = torch.split(x_cont, idx, dim=0)\n",
     "masks_train, masks_val = torch.split(masks, idx, dim=0)\n",
@@ -174,7 +167,7 @@
     "if x_cat is not None:\n",
     "    x_cat_train, x_cat_val = torch.split(x_cat, idx, dim=0)\n",
     "else:\n",
-    "    x_cat_train, x_cat_val = None, None\n"
+    "    x_cat_train, x_cat_val = None, None"
    ]
   },
   {
@@ -244,7 +237,7 @@
     "    \"n_tokens\": None,\n",
     "    \"kv_compression_ratio\": None,\n",
     "    \"kv_compression_sharing\": None,\n",
-    "    \"head_activation\": nn.GELU, # nn.ReLU\n",
+    "    \"head_activation\": nn.GELU,  # nn.ReLU\n",
     "    \"head_normalization\": nn.LayerNorm,\n",
     "    \"d_out\": 1,  # fix at 1, due to binary classification\n",
     "}\n",
@@ -254,8 +247,8 @@
     "optim_params = {\"lr\": 1e-4, \"weight_decay\": 0.00001}\n",
     "\n",
     "module_params = {\n",
-    "    \"transformer\": Transformer(**transformer_kwargs),  \n",
-    "    \"feature_tokenizer\": FeatureTokenizer(**feature_tokenizer_kwargs),   # noqa: E501\n",
+    "    \"transformer\": Transformer(**transformer_kwargs),\n",
+    "    \"feature_tokenizer\": FeatureTokenizer(**feature_tokenizer_kwargs),\n",
     "    \"cat_features\": None,\n",
     "    \"cat_cardinalities\": [],\n",
     "}\n",
@@ -268,7 +261,7 @@
     "clf_head = CLSHead(**head_kwargs)\n",
     "clf.transformer.head = clf_head\n",
     "\n",
-    "clf.to(device)\n"
+    "clf.to(device)"
    ]
   },
   {
@@ -277,19 +270,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_loader = TabDataLoader(\n",
-    "    x_cat_train,\n",
-    "    x_cont_train,\n",
-    "    masks_train, \n",
-    "    **dl_params\n",
-    ")\n",
+    "train_loader = TabDataLoader(x_cat_train, x_cont_train, masks_train, **dl_params)\n",
     "\n",
-    "val_loader = TabDataLoader(\n",
-    "    x_cat_val,\n",
-    "    x_cont_val,\n",
-    "    masks_val, \n",
-    "    **dl_params\n",
-    ")"
+    "val_loader = TabDataLoader(x_cat_val, x_cont_val, masks_val, **dl_params)"
    ]
   },
   {
@@ -298,7 +281,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "optimizer = optim.AdamW(clf.parameters(),\n",
+    "optimizer = optim.AdamW(\n",
+    "    clf.parameters(),\n",
     "    lr=optim_params[\"lr\"],\n",
     "    weight_decay=optim_params[\"weight_decay\"],\n",
     ")\n",
@@ -309,7 +293,9 @@
     "print(f\"warmup steps: {warmup}\")\n",
     "print(max_iters)\n",
     "\n",
-    "scheduler = CosineWarmupScheduler(optimizer=optimizer, warmup=warmup, max_iters=max_iters)"
+    "scheduler = CosineWarmupScheduler(\n",
+    "    optimizer=optimizer, warmup=warmup, max_iters=max_iters\n",
+    ")"
    ]
   },
   {
@@ -319,18 +305,17 @@
    "outputs": [],
    "source": [
     "def checkpoint(model):\n",
-    "    \n",
     "    # remove old files\n",
     "    for fn in glob.glob(f\"checkpoints/{run.id}*\"):\n",
-    "        os.remove(fn) \n",
-    "    \n",
+    "        os.remove(fn)\n",
+    "\n",
     "    # create_dir\n",
     "    dir_checkpoints = \"checkpoints/\"\n",
-    "    os.makedirs(dir_checkpoints, exist_ok = True) \n",
-    "    \n",
+    "    os.makedirs(dir_checkpoints, exist_ok=True)\n",
+    "\n",
     "    # save new file\n",
     "    print(\"saving new checkpoints.\")\n",
-    "    torch.save(model.state_dict(), os.path.join(dir_checkpoints,f\"{run.id}*\"))"
+    "    torch.save(model.state_dict(), os.path.join(dir_checkpoints, f\"{run.id}*\"))"
    ]
   },
   {
@@ -349,75 +334,74 @@
     "best_step = -1\n",
     "\n",
     "for epoch in tqdm(range(epochs)):\n",
-    "\n",
     "    # perform training\n",
     "    loss_in_epoch_train = 0\n",
     "\n",
     "    batch = 0\n",
-    "    \n",
+    "\n",
     "    for x_cat, x_cont, masks in train_loader:\n",
-    "    \n",
     "        clf.train()\n",
     "        optimizer.zero_grad()\n",
-    "        \n",
-    "        with torch.autocast(device_type='cuda', dtype=torch.float16):\n",
+    "\n",
+    "        with torch.autocast(device_type=\"cuda\", dtype=torch.float16):\n",
     "            logits = clf(x_cat, x_cont)\n",
     "            train_loss = criterion(logits, masks.float())\n",
     "\n",
     "        scaler.scale(train_loss).backward()\n",
     "        scaler.step(optimizer)\n",
     "        scaler.update()\n",
-    "        \n",
+    "\n",
     "        scheduler.step()\n",
-    "        \n",
+    "\n",
     "        # add the mini-batch training loss to epoch loss\n",
     "        loss_in_epoch_train += train_loss.item()\n",
-    "        \n",
-    "        wandb.log({\"train_loss_step\": train_loss.item(), \"epoch\": epoch, \"batch\": batch})\n",
+    "\n",
+    "        wandb.log(\n",
+    "            {\"train_loss_step\": train_loss.item(), \"epoch\": epoch, \"batch\": batch}\n",
+    "        )\n",
     "\n",
     "        batch += 1\n",
-    "        step +=1\n",
+    "        step += 1\n",
     "\n",
     "    clf.eval()\n",
     "    loss_in_epoch_val = 0.0\n",
     "    correct = 0\n",
-    "    \n",
+    "\n",
     "    with torch.no_grad():\n",
     "        for x_cat, x_cont, masks in val_loader:\n",
-    "\n",
     "            # for my implementation\n",
     "            logits = clf(x_cat, x_cont)\n",
     "            val_loss = criterion(logits, masks.float())\n",
-    "            \n",
     "\n",
     "            # hard_predictions = torch.zeros_like(logits, dtype=torch.long)\n",
     "            # hard_predictions[logits > 0] = 1\n",
     "            # correct += (hard_predictions.bool() == masks).sum()  / hard_predictions.shape[0]\n",
     "\n",
     "            loss_in_epoch_val += val_loss.item()\n",
-    "            wandb.log({\"val_loss_step\": val_loss.item(), \"epoch\": epoch, \"batch\": batch})\n",
-    "            \n",
-    "            batch +=1      \n",
+    "            wandb.log(\n",
+    "                {\"val_loss_step\": val_loss.item(), \"epoch\": epoch, \"batch\": batch}\n",
+    "            )\n",
+    "\n",
+    "            batch += 1\n",
     "\n",
     "    # correct / (rows * columns)\n",
-    "    # val_accuracy = correct / (X_train.shape[0] * X_train.shape[1])        \n",
-    "    \n",
+    "    # val_accuracy = correct / (X_train.shape[0] * X_train.shape[1])\n",
+    "\n",
     "    # loss average over all batches\n",
     "    train_loss = loss_in_epoch_train / len(train_loader)\n",
     "    val_loss = loss_in_epoch_val / len(val_loader)\n",
-    "    \n",
+    "\n",
     "    print(f\"train loss: {train_loss}\")\n",
     "    print(f\"val loss: {val_loss}\")\n",
-    "    \n",
+    "\n",
     "    # correct samples / no samples\n",
     "    # val_accuracy = correct / len(X_val)\n",
     "    # if best_accuracy < val_accuracy:\n",
     "    #     checkpoint(clf, f\"checkpoints/{run.id}-{step}.ptx\")\n",
     "    #     best_accuracy = val_accuracy\n",
     "    #     best_step = step\n",
-    "    \n",
-    "    \n",
-    "    wandb.log({\"train_loss\": train_loss, 'epoch': epoch})"
+    "\n",
+    "    wandb.log({\"train_loss\": train_loss, \"epoch\": epoch})"
    ]
   },
   {
diff --git a/notebooks/5.0a-mb-batch-size-finder.ipynb b/notebooks/5.0a-mb-batch-size-finder.ipynb
index 40b478e1..362d159b 100644
--- a/notebooks/5.0a-mb-batch-size-finder.ipynb
+++ b/notebooks/5.0a-mb-batch-size-finder.ipynb
@@ -6,12 +6,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import os\n",
     "from time import sleep\n",
     "from typing import Optional\n",
     "\n",
     "import torch\n",
     "import torch.nn as nn\n",
     "import torch.nn.functional as F\n",
+    "\n",
     "from otc.data.dataloader import TabDataLoader\n",
     "from otc.models.activation import ReGLU\n",
     "from otc.models.fttransformer import (\n",
@@ -20,9 +22,7 @@
     "    Transformer,\n",
     ")\n",
     "\n",
-    "import os\n",
-    "\n",
-    "os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n"
+    "os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\""
    ]
   },
   {
@@ -111,7 +111,6 @@
     "\n",
     "\n",
     "def get_datasets(batch_size: int, num_workers: int = 2):\n",
-    "\n",
     "    x_cat = torch.randint(0, CAT_CARDINALITY, (DATASET_SIZE, NUM_FEATURES_CAT))\n",
     "    x_cont = torch.rand((DATASET_SIZE, NUM_FEATURES_CONT))\n",
     "    weight = torch.ones((DATASET_SIZE, 1))\n",
@@ -195,7 +194,7 @@
     "\n",
     "\n",
     "if __name__ == \"__main__\":\n",
-    "    main()\n"
+    "    main()"
    ]
   },
   {
diff --git a/notebooks/6.0a-mb-results-fttransformer.ipynb b/notebooks/6.0a-mb-results-fttransformer.ipynb
index f456233a..ff79bdf9 100644
--- a/notebooks/6.0a-mb-results-fttransformer.ipynb
+++ b/notebooks/6.0a-mb-results-fttransformer.ipynb
@@ -11,16 +11,14 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "import sys\n",
     "import pickle\n",
+    "import sys\n",
     "from pathlib import Path\n",
     "\n",
-    "import google.auth\n",
     "import gcsfs\n",
-    "\n",
+    "import google.auth\n",
     "import pandas as pd\n",
     "import wandb\n",
-    "\n",
     "from tqdm.auto import tqdm\n",
     "\n",
     "sys.path.append(\"..\")\n",
@@ -29,7 +27,7 @@
     "    features_classical,\n",
     "    features_classical_size,\n",
     "    features_ml,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -41,7 +39,7 @@
    "outputs": [],
    "source": [
     "# set globally here\n",
-    "EXCHANGE = \"cboe\" # \"ise\"  # \"cboe\"\n",
+    "EXCHANGE = \"cboe\"  # \"ise\"  # \"cboe\"\n",
     "STRATEGY = \"transfer\"  # \"supervised\"\n",
     "SUBSET = \"test\"  # \"all\"\n",
     "\n",
@@ -49,7 +47,7 @@
     "# ise-trained models, supervised/semisupervised\n",
     "models = [\n",
     "    (\"classical\", \"3jpe46s1_TransformerClassifier_default.pkl:latest\"),\n",
-    "    (\"classical-size\", \"1qx3ul4j_TransformerClassifier_default.pkl:latest\"), \n",
+    "    (\"classical-size\", \"1qx3ul4j_TransformerClassifier_default.pkl:latest\"),\n",
     "    (\"ml\", \"2h81aiow_TransformerClassifier_default.pkl:latest\"),\n",
     "]"
    ]
@@ -64,7 +62,7 @@
    "source": [
     "# key used for files and artefacts\n",
     "key = f\"{EXCHANGE}_fttransformer_{STRATEGY}_{SUBSET}\"\n",
-    "dataset = f\"fbv/thesis/{EXCHANGE}_{STRATEGY}_log_standardized_clipped:latest\"\n"
+    "dataset = f\"fbv/thesis/{EXCHANGE}_{STRATEGY}_log_standardized_clipped:latest\""
    ]
   },
   {
@@ -99,7 +97,7 @@
     "run = wandb.init(project=\"thesis\", entity=\"fbv\")\n",
     "\n",
     "artifact = run.use_artifact(dataset)\n",
-    "data_dir = artifact.download()\n"
+    "data_dir = artifact.download()"
    ]
   },
   {
@@ -121,7 +119,7 @@
     "    data = pd.read_parquet(Path(data_dir, \"test_set.parquet\"), engine=\"fastparquet\")\n",
     "\n",
     "y_test = data[\"buy_sell\"]\n",
-    "X_test = data.drop(columns=\"buy_sell\")\n"
+    "X_test = data.drop(columns=\"buy_sell\")"
    ]
   },
   {
@@ -153,26 +151,24 @@
    "outputs": [],
    "source": [
     "def count_parameters(model):\n",
-    "    \"\"\"\n",
-    "    Count number of parameters, that require gradient-update in model.\n",
-    "    \n",
+    "    \"\"\"Count number of parameters, that require gradient-update in model.\n",
+    "\n",
     "    Found here: https://discuss.pytorch.org/t/how-do-i-check-the-number-of-parameters-of-a-model/4325/9\n",
     "    \"\"\"\n",
     "    return sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
     "\n",
-    "for feature_str, model in tqdm(models):\n",
     "\n",
+    "for feature_str, model in tqdm(models):\n",
     "    model_name = model.split(\"/\")[-1].split(\":\")[0]\n",
     "\n",
     "    artifact = run.use_artifact(model)\n",
     "    model_dir = artifact.download()\n",
-    "    \n",
-    "    with open(Path(model_dir, model_name), 'rb') as f:\n",
+    "\n",
+    "    with open(Path(model_dir, model_name), \"rb\") as f:\n",
     "        model = pickle.load(f)\n",
-    "    \n",
+    "\n",
     "    print(feature_str)\n",
-    "    print(count_parameters(model.clf))\n",
-    "    "
+    "    print(count_parameters(model.clf))"
    ]
   },
   {
@@ -193,19 +189,18 @@
     "}\n",
     "\n",
     "for feature_str, model in tqdm(models):\n",
-    "\n",
     "    model_name = model.split(\"/\")[-1].split(\":\")[0]\n",
     "\n",
     "    artifact = run.use_artifact(model)\n",
     "    model_dir = artifact.download()\n",
-    "    \n",
-    "    with open(Path(model_dir, model_name), 'rb') as f:\n",
+    "\n",
+    "    with open(Path(model_dir, model_name), \"rb\") as f:\n",
     "        model = pickle.load(f)\n",
     "\n",
     "    fs = FEATURE_MAP.get(feature_str)\n",
     "    # filter categorical features that are in subset and get cardinality\n",
     "    cat_features_sub = [tup[0] for tup in features_categorical if tup[0] in fs]\n",
-    "    \n",
+    "\n",
     "    result = pd.Series(\n",
     "        data=model.predict(X_test.loc[:, fs]),\n",
     "        index=X_test.index,\n",
@@ -233,7 +228,7 @@
     "result_set.add_reference(output_path, name=\"results\")\n",
     "run.log_artifact(result_set)\n",
     "\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   },
   {
@@ -257,17 +252,17 @@
     "artifact = run.use_artifact(models[-1][-1])\n",
     "model_dir = artifact.download()\n",
     "\n",
-    "with open(Path(model_dir, model_name), 'rb') as f:\n",
+    "with open(Path(model_dir, model_name), \"rb\") as f:\n",
     "    model = pickle.load(f)\n",
-    "    \n",
-    "key = model_name.split(\".\")[0]  + \"-embedding.ptx\"\n",
+    "\n",
+    "key = model_name.split(\".\")[0] + \"-embedding.ptx\"\n",
     "\n",
     "\n",
     "uri_embedding = f\"gs://thesis-bucket-option-trade-classification/data/results/{key}\"\n",
     "embeddings = model.clf.feature_tokenizer.cat_tokenizer.embeddings\n",
     "with fs.open(uri_embedding, \"wb\") as f:\n",
     "    pickle.dump(embeddings, f, protocol=4)\n",
-    "    \n",
+    "\n",
     "result_set = wandb.Artifact(name=key, type=\"results\")\n",
     "result_set.add_reference(uri_embedding, name=\"results\")"
    ]
diff --git a/notebooks/6.0b-mb-results-classical-rules.ipynb b/notebooks/6.0b-mb-results-classical-rules.ipynb
index b2bc11b4..f8f5494c 100644
--- a/notebooks/6.0b-mb-results-classical-rules.ipynb
+++ b/notebooks/6.0b-mb-results-classical-rules.ipynb
@@ -18,7 +18,7 @@
     "\n",
     "sys.path.append(\"..\")\n",
     "from otc.features.build_features import features_classical_size\n",
-    "from otc.models.classical_classifier import ClassicalClassifier\n"
+    "from otc.models.classical_classifier import ClassicalClassifier"
    ]
   },
   {
@@ -32,10 +32,10 @@
     "# set here globally\n",
     "seed = 42\n",
     "\n",
-    "exchange = \"ise\" # \"cboe\"\n",
+    "exchange = \"ise\"  # \"cboe\"\n",
     "models = \"classical\"\n",
-    "subset = \"test\" # \"test\" # \"all\" # \"test\"\n",
-    "strategy =  \"supervised\" # \"transfer\"\n"
+    "subset = \"test\"  # \"test\" # \"all\" # \"test\"\n",
+    "strategy = \"supervised\"  # \"transfer\""
    ]
   },
   {
@@ -49,7 +49,7 @@
     "# key used for files and artefacts\n",
     "key = f\"{exchange}_{models}_{strategy}_{subset}\"\n",
     "\n",
-    "dataset = f\"fbv/thesis/{exchange}_{strategy}_none:latest\"\n"
+    "dataset = f\"fbv/thesis/{exchange}_{strategy}_none:latest\""
    ]
   },
   {
@@ -65,7 +65,7 @@
     "\n",
     "# load unscaled data\n",
     "artifact = run.use_artifact(dataset)\n",
-    "data_dir = artifact.download()\n"
+    "data_dir = artifact.download()"
    ]
   },
   {
@@ -79,7 +79,7 @@
     "columns = [\n",
     "    *features_classical_size,\n",
     "    \"buy_sell\",\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -106,8 +106,8 @@
     "elif subset == \"val\":\n",
     "    data = pd.read_parquet(\n",
     "        Path(data_dir, \"val_set.parquet\"), engine=\"fastparquet\", columns=columns\n",
-    "    )    \n",
-    "    \n",
+    "    )\n",
+    "\n",
     "elif subset == \"test\":\n",
     "    data = pd.read_parquet(\n",
     "        Path(data_dir, \"test_set.parquet\"), engine=\"fastparquet\", columns=columns\n",
@@ -116,10 +116,12 @@
     "y_test = data[\"buy_sell\"].astype(\"int8\")\n",
     "\n",
     "\n",
-    "data[\"TRADE_SIZE\"] = data[\"TRADE_SIZE\"].astype('float32') # update dtype Int64 - Float32\n",
+    "data[\"TRADE_SIZE\"] = data[\"TRADE_SIZE\"].astype(\n",
+    "    \"float32\"\n",
+    ")  # update dtype Int64 - Float32\n",
     "X_test = data.drop(columns=\"buy_sell\")\n",
     "\n",
-    "del data\n"
+    "del data"
    ]
   },
   {
@@ -130,7 +132,7 @@
    },
    "outputs": [],
    "source": [
-    "rules = [ #classical\n",
+    "rules = [  # classical\n",
     "    [(\"tick\", \"ex\")],\n",
     "    [(\"rev_tick\", \"ex\")],\n",
     "    [(\"tick\", \"all\")],\n",
@@ -149,7 +151,7 @@
     "    [(\"rev_emo\", \"best\")],\n",
     "    [(\"clnv\", \"best\")],\n",
     "    [(\"rev_clnv\", \"best\")],\n",
-    "    [(\"quote\", \"best\"), (\"quote\", \"ex\"), (\"rev_tick\", \"all\")], # grauer (benchmark 1)\n",
+    "    [(\"quote\", \"best\"), (\"quote\", \"ex\"), (\"rev_tick\", \"all\")],  # grauer (benchmark 1)\n",
     "    [\n",
     "        (\"trade_size\", \"ex\"),\n",
     "        (\"quote\", \"best\"),\n",
@@ -157,14 +159,14 @@
     "        (\"depth\", \"best\"),\n",
     "        (\"depth\", \"ex\"),\n",
     "        (\"rev_tick\", \"all\"),\n",
-    "    ],  # grauer (benchmark 2) \n",
+    "    ],  # grauer (benchmark 2)\n",
     "]\n",
     "\n",
     "# generate names for array\n",
     "names = []\n",
     "for r in tqdm(rules):\n",
     "    name = \"->\".join(\"%s(%s)\" % tup for tup in r)\n",
-    "    names.append(name)\n"
+    "    names.append(name)"
    ]
   },
   {
@@ -182,7 +184,7 @@
     "    # fit is only used to set sklearn attributes, no leakage\n",
     "    clf.fit(X=X_test.head(5), y=y_test.head(5))\n",
     "    result = clf.predict(X_test).astype(int)\n",
-    "    results.append(result)\n"
+    "    results.append(result)"
    ]
   },
   {
@@ -191,7 +193,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "results = pd.DataFrame(dict(zip(names, results)), index=X_test.index)\n"
+    "results = pd.DataFrame(dict(zip(names, results)), index=X_test.index)"
    ]
   },
   {
@@ -203,7 +205,7 @@
     "output_path = (\n",
     "    f\"gs://thesis-bucket-option-trade-classification/data/results/{key}.parquet\"\n",
     ")\n",
-    "results.to_parquet(output_path)\n"
+    "results.to_parquet(output_path)"
    ]
   },
   {
@@ -219,7 +221,7 @@
     "result_set.add_reference(output_path, name=\"results\")\n",
     "run.log_artifact(result_set)\n",
     "\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   }
  ],
diff --git a/notebooks/6.0c-mb-results-universal.ipynb b/notebooks/6.0c-mb-results-universal.ipynb
index f007e6d7..3a80a71e 100644
--- a/notebooks/6.0c-mb-results-universal.ipynb
+++ b/notebooks/6.0c-mb-results-universal.ipynb
@@ -10,7 +10,6 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "import random\n",
     "import sys\n",
     "from pathlib import Path\n",
     "\n",
@@ -22,11 +21,12 @@
     "import warnings\n",
     "\n",
     "import wandb\n",
-    "from otc.metrics.metrics import effective_spread\n",
+    "from numpy.exceptions import VisibleDeprecationWarning\n",
     "from scipy.stats import wilcoxon\n",
+    "from statsmodels.stats.contingency_tables import mcnemar\n",
     "from tqdm.auto import tqdm\n",
     "\n",
-    "from statsmodels.stats.contingency_tables import mcnemar"
+    "from otc.metrics.metrics import effective_spread"
    ]
   },
   {
@@ -38,10 +38,10 @@
    "outputs": [],
    "source": [
     "# set here globally\n",
-    "EXCHANGE = \"cboe\" # \"ise\"\n",
-    "MODELS = [\"gbm\",\"fttransformer\"] # \"classical\", \"fttransformer\", \"gbm\"\n",
+    "EXCHANGE = \"cboe\"  # \"ise\"\n",
+    "MODELS = [\"gbm\", \"fttransformer\"]  # \"classical\", \"fttransformer\", \"gbm\"\n",
     "SUBSET = \"test\"  # \"all\"\n",
-    "STRATEGY = \"transfer\" # \"supervised\"  \n",
+    "STRATEGY = \"transfer\"  # \"supervised\"\n",
     "\n",
     "RETRAIN = False"
    ]
@@ -62,7 +62,7 @@
     "run = wandb.init(project=\"thesis\", entity=\"fbv\")\n",
     "\n",
     "# load unscaled data\n",
-    "artifact = run.use_artifact(DATASET)  \n",
+    "artifact = run.use_artifact(DATASET)\n",
     "data_dir = artifact.download()\n",
     "\n",
     "# load results\n",
@@ -73,9 +73,9 @@
     "        results = f\"fbv/thesis/{EXCHANGE}_{model}_{STRATEGY}_{SUBSET}_retrain:latest\"\n",
     "    else:\n",
     "        results = f\"fbv/thesis/{EXCHANGE}_{model}_{STRATEGY}_{SUBSET}:latest\"\n",
-    "    artifact = run.use_artifact(results)  \n",
+    "    artifact = run.use_artifact(results)\n",
     "    result_dir = artifact.download()\n",
-    "    result_dirs.append(result_dir)\n"
+    "    result_dirs.append(result_dir)"
    ]
   },
   {
@@ -135,7 +135,7 @@
     "\n",
     "X_print = eval_data\n",
     "\n",
-    "del results\n"
+    "del results"
    ]
   },
   {
@@ -151,10 +151,10 @@
     "    [\n",
     "        # (\"fttransformer\", \"fttransformer(classical)\"),\n",
     "        # (\"fttransformer\", \"fttransformer(classical-size)\"),\n",
-    "        # (\"fttransformer\", \"fttransformer(ml)\"),        \n",
+    "        # (\"fttransformer\", \"fttransformer(ml)\"),\n",
     "        (\"fttransformer\", \"fttransformer(semi-classical)\"),\n",
     "        (\"fttransformer\", \"fttransformer(semi-classical-size)\"),\n",
-    "        (\"fttransformer\", \"fttransformer(semi-ml)\"),   \n",
+    "        (\"fttransformer\", \"fttransformer(semi-ml)\"),\n",
     "        # (\"gbm\", \"gbm(classical)\"),\n",
     "        # (\"gbm\", \"gbm(classical-size)\"),\n",
     "        # (\"gbm\", \"gbm(ml)\"),\n",
@@ -162,10 +162,9 @@
     "        # # (\"gbm\", \"gbm(classical-size-retraining)\"),\n",
     "        # # (\"gbm\", \"gbm(ml-retraining)\"),\n",
     "        (\"gbm\", \"gbm(semi-classical)\"),\n",
-    "        (\"gbm\",'gbm(semi-classical-size)'),\n",
-    "        (\"gbm\",'gbm(semi-ml)'),\n",
-    "\n",
-    "#         # viz\n",
+    "        (\"gbm\", \"gbm(semi-classical-size)\"),\n",
+    "        (\"gbm\", \"gbm(semi-ml)\"),\n",
+    "        #         # viz\n",
     "        # (\"classical\", \"tick(all)\"),\n",
     "        # (\"classical\", \"quote(best)\"),\n",
     "        # (\"classical\", \"quote(ex)\"),\n",
@@ -177,40 +176,35 @@
     "        # (\"classical\", \"depth(ex)\"),\n",
     "        # (\"classical\", \"depth(best)\"),\n",
     "        # (\"classical\", \"trade_size(ex)\"),\n",
-    "\n",
-    "#         # batch 1 / detailled analysis\n",
-    "#         (\"classical\", \"tick(ex)\"),\n",
-    "#         (\"classical\", \"rev_tick(ex)\"),\n",
-    "#         (\"classical\", \"quote(ex)\"),\n",
-    "#         (\"classical\", \"lr(ex)\"),\n",
-    "#         (\"classical\", \"rev_lr(ex)\"),\n",
-    "#         (\"classical\", \"emo(ex)\"),\n",
-    "#         (\"classical\", \"rev_emo(ex)\"),\n",
-    "        \n",
-    "#         # batch 2\n",
-    "#         (\"classical\", \"clnv(ex)\"),\n",
-    "#         (\"classical\", \"rev_clnv(ex)\"),\n",
-    "#         (\"classical\", \"tick(all)\"),\n",
-    "#         (\"classical\", \"rev_tick(all)\"),\n",
-    "#         (\"classical\", \"quote(best)\"),\n",
-    "#         (\"classical\", \"lr(best)\"),\n",
-    "#         (\"classical\", \"rev_lr(best)\"),\n",
-    "        \n",
-    "#         # batch 3\n",
-    "#         (\"classical\", \"emo(best)\"),\n",
-    "#         (\"classical\", \"rev_emo(best)\"),\n",
-    "#         (\"classical\", \"clnv(best)\"),\n",
-    "#         (\"classical\", \"rev_clnv(best)\"), \n",
+    "        #         # batch 1 / detailled analysis\n",
+    "        #         (\"classical\", \"tick(ex)\"),\n",
+    "        #         (\"classical\", \"rev_tick(ex)\"),\n",
+    "        #         (\"classical\", \"quote(ex)\"),\n",
+    "        #         (\"classical\", \"lr(ex)\"),\n",
+    "        #         (\"classical\", \"rev_lr(ex)\"),\n",
+    "        #         (\"classical\", \"emo(ex)\"),\n",
+    "        #         (\"classical\", \"rev_emo(ex)\"),\n",
+    "        #         # batch 2\n",
+    "        #         (\"classical\", \"clnv(ex)\"),\n",
+    "        #         (\"classical\", \"rev_clnv(ex)\"),\n",
+    "        #         (\"classical\", \"tick(all)\"),\n",
+    "        #         (\"classical\", \"rev_tick(all)\"),\n",
+    "        #         (\"classical\", \"quote(best)\"),\n",
+    "        #         (\"classical\", \"lr(best)\"),\n",
+    "        #         (\"classical\", \"rev_lr(best)\"),\n",
+    "        #         # batch 3\n",
+    "        #         (\"classical\", \"emo(best)\"),\n",
+    "        #         (\"classical\", \"rev_emo(best)\"),\n",
+    "        #         (\"classical\", \"clnv(best)\"),\n",
+    "        #         (\"classical\", \"rev_clnv(best)\"),\n",
     "        # (\"classical\", \"quote(best)->quote(ex)->rev_tick(all)\"),\n",
     "        # (\n",
     "        #     \"classical\",\n",
     "        #     \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\",\n",
     "        # ),\n",
-    "        \n",
     "        # detailed analysis\n",
-    "        \n",
     "    ]\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -222,7 +216,7 @@
    "outputs": [],
    "source": [
     "LUT = {\n",
-    "    \"Trade_Size(ex)->Quote(Best)->Depth(Best)->Quote(Ex)->Depth(Ex)->Rev_Tick(All)\": \"\\gls{GBM}\",\n",
+    "    \"Trade_Size(ex)->Quote(Best)->Depth(Best)->Quote(Ex)->Depth(Ex)->Rev_Tick(All)\": r\"\\gls{GBM}\",\n",
     "    \"(Ex)\": \" (Ex)\",\n",
     "    \"(Best)\": \" (Best)\",\n",
     "    \"(Classical)\": \" (Classical)\",\n",
@@ -231,12 +225,12 @@
     "    \"Trade_Size\": \"Trade Size\",\n",
     "    \"Depth\": \"Depth\",\n",
     "    \"->\": \" $\\\\to$ \",\n",
-    "    \"Lr\": \"\\gls{LR}\",\n",
-    "    \"Emo\": \"\\gls{EMO}\",\n",
-    "    \"Clnv\": \"\\gls{CLNV}\",\n",
+    "    \"Lr\": r\"\\gls{LR}\",\n",
+    "    \"Emo\": r\"\\gls{EMO}\",\n",
+    "    \"Clnv\": r\"\\gls{CLNV}\",\n",
     "    \"OPTION_TYPE\": \"Option Type\",\n",
-    "    \"_\": \"$\\_\",\n",
-    "    \"Gbm\": \"\\gls{GBM}\",\n",
+    "    \"_\": r\"$\\_\",\n",
+    "    \"Gbm\": r\"\\gls{GBM}\",\n",
     "}\n",
     "\n",
     "LUT_INDEX = {\n",
@@ -260,7 +254,7 @@
     "\n",
     "\n",
     "def highlight_max(s, props=\"\"):\n",
-    "    return np.where(s == np.nanmax(s.values), props, \"\")\n"
+    "    return np.where(s == np.nanmax(s.values), props, \"\")"
    ]
   },
   {
@@ -290,7 +284,7 @@
     "            convert_css=True,\n",
     "        )\n",
     "    )\n",
-    "    return res\n"
+    "    return res"
    ]
   },
   {
@@ -302,7 +296,7 @@
    "outputs": [],
    "source": [
     "classifiers = results_data.columns.tolist()\n",
-    "criterions = list(LUT_INDEX)\n"
+    "criterions = list(LUT_INDEX)"
    ]
   },
   {
@@ -355,7 +349,7 @@
     "    label=f\"{KEY.lower()}-unclassfied\",\n",
     "    bold_axis=0,\n",
     ")\n",
-    "unclassified\n"
+    "unclassified"
    ]
   },
   {
@@ -381,14 +375,14 @@
     "results_data.replace(0, np.nan, inplace=True)\n",
     "# assume same filler for every column\n",
     "filler = pd.Series(\n",
-    "        rng.choice(a=[-1, 1], size=results_data.shape[0]),\n",
-    "        index=results_data.index,\n",
-    "        # columns=results_data.columns,\n",
+    "    rng.choice(a=[-1, 1], size=results_data.shape[0]),\n",
+    "    index=results_data.index,\n",
+    "    # columns=results_data.columns,\n",
     ")\n",
     "\n",
     "# do column-wise as we run out of memory otherwise\n",
     "for classifier in tqdm(classifiers):\n",
-    "    results_data[classifier].fillna(filler, inplace=True)\n"
+    "    results_data[classifier].fillna(filler, inplace=True)"
    ]
   },
   {
@@ -404,40 +398,64 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
     "X_print = pd.concat([X_print, results_data], axis=1)\n",
     "Y = X_print[[*results_data.columns.tolist(), \"buy_sell\"]]\n",
     "\n",
-    "import statsmodels\n",
-    "from statsmodels.stats.contingency_tables import mcnemar\n",
     "\n",
     "results = []\n",
-    "combinations = [(('gbm', 'gbm(semi-classical)'), ('fttransformer', 'fttransformer(semi-classical)')),\n",
-    "                (('gbm', 'gbm(semi-classical-size)'), ('fttransformer', 'fttransformer(semi-classical-size)')),\n",
-    "                (('gbm', 'gbm(semi-ml)'), ('fttransformer', 'fttransformer(semi-ml)'))]\n",
+    "combinations = [\n",
+    "    (\n",
+    "        (\"gbm\", \"gbm(semi-classical)\"),\n",
+    "        (\"fttransformer\", \"fttransformer(semi-classical)\"),\n",
+    "    ),\n",
+    "    (\n",
+    "        (\"gbm\", \"gbm(semi-classical-size)\"),\n",
+    "        (\"fttransformer\", \"fttransformer(semi-classical-size)\"),\n",
+    "    ),\n",
+    "    ((\"gbm\", \"gbm(semi-ml)\"), (\"fttransformer\", \"fttransformer(semi-ml)\")),\n",
+    "]\n",
     "significance = 0.05\n",
     "\n",
-    "def get_contingency_table(Y, ground_truth, model_1, model_2):\n",
     "\n",
+    "def get_contingency_table(Y, ground_truth, model_1, model_2):\n",
     "    Y_ = Y[[ground_truth, model_1, model_2]].copy().astype(int)\n",
     "\n",
-    "    c_0_0 = np.where((Y_[model_1] == Y_[ground_truth]) & (Y_[model_2] == Y_[ground_truth]), 1, 0).sum()\n",
-    "    c_0_1 = np.where((Y_[model_1] == Y_[ground_truth]) & (Y_[model_2] != Y_[ground_truth]), 1, 0).sum()\n",
-    "    c_1_0 = np.where((Y_[model_1] != Y_[ground_truth]) & (Y_[model_2] == Y_[ground_truth]), 1, 0).sum()\n",
-    "    c_1_1 = np.where((Y_[model_1] != Y_[ground_truth]) & (Y_[model_2] != Y_[ground_truth]), 1, 0).sum()\n",
-    "    \n",
+    "    c_0_0 = np.where(\n",
+    "        (Y_[model_1] == Y_[ground_truth]) & (Y_[model_2] == Y_[ground_truth]), 1, 0\n",
+    "    ).sum()\n",
+    "    c_0_1 = np.where(\n",
+    "        (Y_[model_1] == Y_[ground_truth]) & (Y_[model_2] != Y_[ground_truth]), 1, 0\n",
+    "    ).sum()\n",
+    "    c_1_0 = np.where(\n",
+    "        (Y_[model_1] != Y_[ground_truth]) & (Y_[model_2] == Y_[ground_truth]), 1, 0\n",
+    "    ).sum()\n",
+    "    c_1_1 = np.where(\n",
+    "        (Y_[model_1] != Y_[ground_truth]) & (Y_[model_2] != Y_[ground_truth]), 1, 0\n",
+    "    ).sum()\n",
+    "\n",
     "    #  [both right, gbm right/transformer wrong, gbm wrong/transformer right, both wrong]\n",
-    "    contingency_table = [[c_0_0, c_0_1],[c_1_0, c_1_1]]\n",
+    "    contingency_table = [[c_0_0, c_0_1], [c_1_0, c_1_1]]\n",
     "\n",
     "    return np.array(contingency_table)\n",
     "\n",
-    "    \n",
+    "\n",
     "for combination in tqdm(combinations):\n",
-    "    contingency_table = get_contingency_table(Y, 'buy_sell', combination[0], combination[1])\n",
+    "    contingency_table = get_contingency_table(\n",
+    "        Y, \"buy_sell\", combination[0], combination[1]\n",
+    "    )\n",
     "    test = mcnemar(contingency_table, exact=False, correction=True)\n",
-    "    \n",
-    "    results.append({\"contingency_table\": contingency_table, \"model_1\": combination[0], \"model_2\": combination[1], \"statistic\": test.statistic, \"p-value\": test.pvalue, \"significant\": test.pvalue < significance})\n",
-    "    \n",
+    "\n",
+    "    results.append(\n",
+    "        {\n",
+    "            \"contingency_table\": contingency_table,\n",
+    "            \"model_1\": combination[0],\n",
+    "            \"model_2\": combination[1],\n",
+    "            \"statistic\": test.statistic,\n",
+    "            \"p-value\": test.pvalue,\n",
+    "            \"significant\": test.pvalue < significance,\n",
+    "        }\n",
+    "    )\n",
+    "\n",
     "pd.DataFrame(results).to_csv(f\"../models/{EXCHANGE}-mcnemar.csv\")"
    ]
   },
@@ -576,7 +594,7 @@
     "        \"year\",\n",
     "    ],\n",
     "    inplace=True,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -587,7 +605,7 @@
    },
    "outputs": [],
    "source": [
-    "X_print = pd.concat([X_print, results_data], axis=1)\n"
+    "X_print = pd.concat([X_print, results_data], axis=1)"
    ]
   },
   {
@@ -617,7 +635,7 @@
    "outputs": [],
    "source": [
     "# FIXME: Find better approach\n",
-    "warnings.filterwarnings(\"ignore\", category=np.VisibleDeprecationWarning)\n",
+    "warnings.filterwarnings(\"ignore\", category=VisibleDeprecationWarning)\n",
     "\n",
     "result_dfs = []\n",
     "\n",
@@ -650,7 +668,7 @@
     "    # )\n",
     "\n",
     "    # store all result sets for later use\n",
-    "    result_dfs.append(result_df)\n"
+    "    result_dfs.append(result_df)"
    ]
   },
   {
@@ -690,7 +708,7 @@
     "    caption=(\"master-long\", \"master-short\"),\n",
     "    label=f\"{KEY}-master\",\n",
     "    bold_axis=0,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -730,7 +748,9 @@
     "results = []\n",
     "\n",
     "# calculate true rel effective spread but not aggregated, convert to %\n",
-    "es_true = effective_spread(X_print[\"buy_sell\"], X_print[\"TRADE_PRICE\"], mid, mode=\"none\")\n",
+    "es_true = effective_spread(\n",
+    "    X_print[\"buy_sell\"], X_print[\"TRADE_PRICE\"], mid, mode=\"none\"\n",
+    ")\n",
     "nom_true = np.nanmean(es_true)\n",
     "\n",
     "eps_true = np.empty(es_true.shape)\n",
@@ -739,26 +759,31 @@
     "\n",
     "\n",
     "for classifier in tqdm(classifiers):\n",
-    "\n",
     "    # calculate pred rel effective spread but not aggregated convert to %\n",
-    "    es_pred = effective_spread(X_print[classifier], X_print[\"TRADE_PRICE\"], mid, mode=\"none\")\n",
-    "    \n",
+    "    es_pred = effective_spread(\n",
+    "        X_print[classifier], X_print[\"TRADE_PRICE\"], mid, mode=\"none\"\n",
+    "    )\n",
+    "\n",
     "    eps_pred = np.empty(es_pred.shape)\n",
     "    np.divide(es_pred, mid, out=eps_pred, where=mid != 0)\n",
     "\n",
-    "    wilcoxon_res  = wilcoxon(eps_pred, eps_true, nan_policy=\"omit\", zero_method=\"zsplit\")\n",
+    "    wilcoxon_res = wilcoxon(eps_pred, eps_true, nan_policy=\"omit\", zero_method=\"zsplit\")\n",
     "\n",
     "    res = pd.Series(\n",
-    "            {\n",
-    "                \"nom_pred\": np.nanmean(es_pred),\n",
-    "                \"rel_pred\": np.nanmean(eps_pred),\n",
-    "                \"statistic\":wilcoxon_res.statistic,\n",
-    "                \"pvalue\":wilcoxon_res.pvalue,\n",
-    "            }, name=classifier\n",
-    "        )\n",
+    "        {\n",
+    "            \"nom_pred\": np.nanmean(es_pred),\n",
+    "            \"rel_pred\": np.nanmean(eps_pred),\n",
+    "            \"statistic\": wilcoxon_res.statistic,\n",
+    "            \"pvalue\": wilcoxon_res.pvalue,\n",
+    "        },\n",
+    "        name=classifier,\n",
+    "    )\n",
     "    results.append(res)\n",
     "\n",
-    "true_eff = pd.Series({\"nom_pred\":nom_true, \"rel_pred\": rel_true, \"statistic\":np.NaN, \"pvalue\":np.NaN}, name=\"true_eff\")\n",
+    "true_eff = pd.Series(\n",
+    "    {\"nom_pred\": nom_true, \"rel_pred\": rel_true, \"statistic\": np.nan, \"pvalue\": np.nan},\n",
+    "    name=\"true_eff\",\n",
+    ")\n",
     "\n",
     "results.append(true_eff)\n",
     "\n",
@@ -773,7 +798,7 @@
    },
    "outputs": [],
    "source": [
-    "results.T.style.format(\"{:.3f}\")\n"
+    "results.T.style.format(\"{:.3f}\")"
    ]
   },
   {
@@ -791,7 +816,7 @@
     "    label=f\"tab:eff-{KEY}\",\n",
     "    caption=(f\"long-eff-{KEY}\", f\"short-eff-{KEY}\"),\n",
     "    convert_css=True,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -821,7 +846,11 @@
    "outputs": [],
    "source": [
     "# classical baselines\n",
-    "view = [(\"fttransformer\", \"fttransformer(semi-classical)\"), (\"fttransformer\", \"fttransformer(semi-classical-size)\"), (\"fttransformer\", \"fttransformer(semi-ml)\")]\n",
+    "view = [\n",
+    "    (\"fttransformer\", \"fttransformer(semi-classical)\"),\n",
+    "    (\"fttransformer\", \"fttransformer(semi-classical-size)\"),\n",
+    "    (\"fttransformer\", \"fttransformer(semi-ml)\"),\n",
+    "]\n",
     "\n",
     "base = master[\n",
     "    [\n",
@@ -838,7 +867,7 @@
     "]\n",
     "\n",
     "# my ml models\n",
-    "revised = master[view]\n"
+    "revised = master[view]"
    ]
   },
   {
@@ -848,8 +877,7 @@
    "outputs": [],
    "source": [
     "def combine_results(revised: pd.DataFrame, base: pd.DataFrame) -> pd.DataFrame:\n",
-    "    \"\"\"\n",
-    "    Generate print layout like in Grauer et al.\n",
+    "    \"\"\"Generate print layout like in Grauer et al.\n",
     "\n",
     "    https://tex.stackexchange.com/questions/430283/table-with-numbers-in-parentheses-in-siunitx/430290#430290\n",
     "\n",
@@ -864,11 +892,10 @@
     "    combo = pd.DataFrame(revised.values, index=revised.index, columns=midx)\n",
     "\n",
     "    for i, mul_col in enumerate(combo.columns):\n",
-    "\n",
     "        combo[(mul_col[0], \"pm\")] = (combo[mul_col] - base.iloc[:, i]).round(2)\n",
     "        combo.sort_index(axis=1, inplace=True)\n",
     "\n",
-    "    return combo\n"
+    "    return combo"
    ]
   },
   {
@@ -897,7 +924,7 @@
     "    label=f\"tab:diff-{KEY}\",\n",
     "    caption=(f\"long-diff-{KEY}\", f\"short-diff-{KEY}\"),\n",
     "    convert_css=True,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -908,7 +935,7 @@
    },
    "outputs": [],
    "source": [
-    "diff\n"
+    "diff"
    ]
   }
  ],
diff --git a/notebooks/6.0d-mb-results-gradient-boosting.ipynb b/notebooks/6.0d-mb-results-gradient-boosting.ipynb
index 31fcec96..1c14d52b 100644
--- a/notebooks/6.0d-mb-results-gradient-boosting.ipynb
+++ b/notebooks/6.0d-mb-results-gradient-boosting.ipynb
@@ -25,7 +25,7 @@
     "    features_classical,\n",
     "    features_classical_size,\n",
     "    features_ml,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -49,7 +49,7 @@
     "    (\"semi-classical\", \"37lymmzc_CatBoostClassifier_default.cbm:latest\"),\n",
     "    (\"semi-classical-size\", \"1vmti6db_CatBoostClassifier_default.cbm:latest\"),\n",
     "    (\"semi-ml\", \"t55nd8r0_CatBoostClassifier_default.cbm:latest\"),\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -60,7 +60,7 @@
    "source": [
     "# key used for files and artefacts\n",
     "key = f\"{EXCHANGE}_gbm_{STRATEGY}_{SUBSET}\"\n",
-    "dataset = f\"fbv/thesis/{EXCHANGE}_{STRATEGY}_log_standardized_clipped:latest\"\n"
+    "dataset = f\"fbv/thesis/{EXCHANGE}_{STRATEGY}_log_standardized_clipped:latest\""
    ]
   },
   {
@@ -72,7 +72,7 @@
    "outputs": [],
    "source": [
     "# set project name. Required to access files and artefacts\n",
-    "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n"
+    "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\""
    ]
   },
   {
@@ -92,7 +92,7 @@
     "run = wandb.init(project=\"thesis\", entity=\"fbv\")\n",
     "\n",
     "artifact = run.use_artifact(dataset)\n",
-    "data_dir = artifact.download()\n"
+    "data_dir = artifact.download()"
    ]
   },
   {
@@ -125,7 +125,7 @@
     "    X_retrain = retrain_data.drop(columns=\"buy_sell\")\n",
     "\n",
     "    weight_retrain = np.geomspace(0.001, 1, num=len(y_retrain))\n",
-    "    timestamp_retrain = np.linspace(0, 1, len(y_retrain))\n"
+    "    timestamp_retrain = np.linspace(0, 1, len(y_retrain))"
    ]
   },
   {
@@ -156,7 +156,6 @@
     "}\n",
     "\n",
     "for feature_str, model in tqdm(models):\n",
-    "\n",
     "    model_name = model.split(\"/\")[-1].split(\":\")[0]\n",
     "\n",
     "    artifact = run.use_artifact(model)\n",
@@ -198,7 +197,7 @@
     "            index=X_test.index,\n",
     "            name=f\"gbm({feature_str}-retraining)\",\n",
     "        )\n",
-    "        results.append(result)\n"
+    "        results.append(result)"
    ]
   },
   {
@@ -220,7 +219,7 @@
     "result_set.add_reference(output_path, name=\"results\")\n",
     "run.log_artifact(result_set)\n",
     "\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   }
  ],
diff --git a/notebooks/6.0e-mb-viz-universal.ipynb b/notebooks/6.0e-mb-viz-universal.ipynb
index 9451c4f2..0d5eb7c0 100644
--- a/notebooks/6.0e-mb-viz-universal.ipynb
+++ b/notebooks/6.0e-mb-viz-universal.ipynb
@@ -9,23 +9,27 @@
    },
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "import matplotlib as mpl\n",
-    "import matplotlib.pyplot as plt\n",
-    "from matplotlib import rc\n",
-    "import torch\n",
-    "import pandas as pd\n",
-    "import matplotlib.dates as mdates\n",
-    "from matplotlib.dates import DateFormatter\n",
-    "import matplotlib.ticker as ticker\n",
-    "from matplotlib.ticker import FormatStrFormatter, StrMethodFormatter, PercentFormatter,MaxNLocator\n",
-    "\n",
     "import json\n",
     "import os\n",
     "import pickle\n",
     "from pathlib import Path\n",
+    "\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.dates as mdates\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.ticker as ticker\n",
+    "import numpy as np\n",
     "import optuna\n",
+    "import pandas as pd\n",
+    "import torch\n",
     "import wandb\n",
+    "from matplotlib import rc\n",
+    "from matplotlib.dates import DateFormatter\n",
+    "from matplotlib.ticker import (\n",
+    "    MaxNLocator,\n",
+    "    PercentFormatter,\n",
+    "    StrMethodFormatter,\n",
+    ")\n",
     "\n",
     "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\""
    ]
@@ -51,7 +55,7 @@
     "plt.rcParams.update(params)\n",
     "rc(\"text\", usetex=True)\n",
     "\n",
-    "plt.rc('text.latex', preamble=r'\\usepackage{amsmath}\\usepackage[utf8]{inputenc}')\n",
+    "plt.rc(\"text.latex\", preamble=r\"\\usepackage{amsmath}\\usepackage[utf8]{inputenc}\")\n",
     "\n",
     "CM = 1 / 2.54\n",
     "# cmap = plt.cm.get_cmap(\"viridis\")\n",
@@ -73,11 +77,35 @@
     "\n",
     "# line cyclers adapted to colourblind people\n",
     "from cycler import cycler\n",
-    "line_cycler   = (cycler(color=[\"#E69F00\", \"#56B4E9\", \"#009E73\", \"#0072B2\", \"#D55E00\", \"#CC79A7\", \"#F0E442\"]) #  + cycler(linestyle=[\"-\", \"--\", \"-.\", \":\", \"-\", \"--\", \"-.\"])\n",
-    "                )\n",
-    "marker_cycler = (cycler(color=[\"#E69F00\", \"#56B4E9\", \"#009E73\", \"#0072B2\", \"#D55E00\", \"#CC79A7\", \"#F0E442\"]) +\n",
-    "                 cycler(linestyle=[\"none\", \"none\", \"none\", \"none\", \"none\", \"none\", \"none\"]) +\n",
-    "                 cycler(marker=[\"4\", \"2\", \"3\", \"1\", \"+\", \"x\", \".\"]))\n",
+    "\n",
+    "line_cycler = (\n",
+    "    cycler(\n",
+    "        color=[\n",
+    "            \"#E69F00\",\n",
+    "            \"#56B4E9\",\n",
+    "            \"#009E73\",\n",
+    "            \"#0072B2\",\n",
+    "            \"#D55E00\",\n",
+    "            \"#CC79A7\",\n",
+    "            \"#F0E442\",\n",
+    "        ]\n",
+    "    )  #  + cycler(linestyle=[\"-\", \"--\", \"-.\", \":\", \"-\", \"--\", \"-.\"])\n",
+    ")\n",
+    "marker_cycler = (\n",
+    "    cycler(\n",
+    "        color=[\n",
+    "            \"#E69F00\",\n",
+    "            \"#56B4E9\",\n",
+    "            \"#009E73\",\n",
+    "            \"#0072B2\",\n",
+    "            \"#D55E00\",\n",
+    "            \"#CC79A7\",\n",
+    "            \"#F0E442\",\n",
+    "        ]\n",
+    "    )\n",
+    "    + cycler(linestyle=[\"none\", \"none\", \"none\", \"none\", \"none\", \"none\", \"none\"])\n",
+    "    + cycler(marker=[\"4\", \"2\", \"3\", \"1\", \"+\", \"x\", \".\"])\n",
+    ")\n",
     "\n",
     "plt.rc(\"axes\", prop_cycle=line_cycler)"
    ]
@@ -97,8 +125,12 @@
    },
    "outputs": [],
    "source": [
-    "accuracies_over_time_ise = pd.read_parquet(\"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_all-classical-accurcies-over-time.parquet\")\n",
-    "accuracies_over_time_cboe = pd.read_parquet(\"gs://thesis-bucket-option-trade-classification/data/results/cboe_supervised_all-classical-accurcies-over-time.parquet\")"
+    "accuracies_over_time_ise = pd.read_parquet(\n",
+    "    \"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_all-classical-accurcies-over-time.parquet\"\n",
+    ")\n",
+    "accuracies_over_time_cboe = pd.read_parquet(\n",
+    "    \"gs://thesis-bucket-option-trade-classification/data/results/cboe_supervised_all-classical-accurcies-over-time.parquet\"\n",
+    ")"
    ]
   },
   {
@@ -109,35 +141,79 @@
    },
    "outputs": [],
    "source": [
-    "fig, ax = plt.subplots(2,1,figsize=(14*CM,10*CM), sharey=True, sharex=True, tight_layout=True)\n",
+    "fig, ax = plt.subplots(\n",
+    "    2, 1, figsize=(14 * CM, 10 * CM), sharey=True, sharex=True, tight_layout=True\n",
+    ")\n",
     "\n",
     "\n",
-    "ax[0].plot(accuracies_over_time_ise[\"tick(all)\"], label=\"$\\operatorname{tick}_{\\mathrm{all}}$\", lw=1)\n",
-    "ax[0].plot(accuracies_over_time_ise[\"quote(best)\"], label=\"$\\operatorname{quote}_{\\mathrm{nbbo}}$\", lw=1, zorder=20)\n",
-    "ax[0].plot(accuracies_over_time_ise[\"quote(best)->quote(ex)->rev_tick(all)\"], label=r\"$\\operatorname{gsu}_{\\mathrm{small}}$\", lw=1, zorder=50)\n",
-    "ax[0].plot(accuracies_over_time_ise[\"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\"], label=r\"$\\operatorname{gsu}_{\\mathrm{large}}$\", lw=1, zorder=100)\n",
+    "ax[0].plot(\n",
+    "    accuracies_over_time_ise[\"tick(all)\"],\n",
+    "    label=r\"$\\operatorname{tick}_{\\mathrm{all}}$\",\n",
+    "    lw=1,\n",
+    ")\n",
+    "ax[0].plot(\n",
+    "    accuracies_over_time_ise[\"quote(best)\"],\n",
+    "    label=r\"$\\operatorname{quote}_{\\mathrm{nbbo}}$\",\n",
+    "    lw=1,\n",
+    "    zorder=20,\n",
+    ")\n",
+    "ax[0].plot(\n",
+    "    accuracies_over_time_ise[\"quote(best)->quote(ex)->rev_tick(all)\"],\n",
+    "    label=r\"$\\operatorname{gsu}_{\\mathrm{small}}$\",\n",
+    "    lw=1,\n",
+    "    zorder=50,\n",
+    ")\n",
+    "ax[0].plot(\n",
+    "    accuracies_over_time_ise[\n",
+    "        \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\"\n",
+    "    ],\n",
+    "    label=r\"$\\operatorname{gsu}_{\\mathrm{large}}$\",\n",
+    "    lw=1,\n",
+    "    zorder=100,\n",
+    ")\n",
     "\n",
-    "ax[0].axvline(x=pd.Timestamp('2013-10-24'), linestyle='--', color='grey', linewidth=0.5)\n",
-    "ax[0].axvline(x=pd.Timestamp('2015-11-05'), linestyle='--', color='grey', linewidth=0.5)\n",
+    "ax[0].axvline(x=pd.Timestamp(\"2013-10-24\"), linestyle=\"--\", color=\"grey\", linewidth=0.5)\n",
+    "ax[0].axvline(x=pd.Timestamp(\"2015-11-05\"), linestyle=\"--\", color=\"grey\", linewidth=0.5)\n",
     "\n",
     "# ax[1].s\n",
-    "ax[1].plot(accuracies_over_time_cboe[\"tick(all)\"], label=\"$\\operatorname{tick}_{\\mathrm{all}}$\", lw=1)\n",
-    "ax[1].plot(accuracies_over_time_cboe[\"quote(best)\"], label=\"$\\operatorname{quote}_{\\mathrm{nbbo}}$\", lw=1, zorder=20)\n",
-    "ax[1].plot(accuracies_over_time_cboe[\"quote(best)->quote(ex)->rev_tick(all)\"], label=r\"$\\operatorname{gsu}_{\\mathrm{small}}$\", lw=1, zorder=50)\n",
-    "ax[1].plot(accuracies_over_time_cboe[\"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\"], label=r\"$\\operatorname{gsu}_{\\mathrm{large}}$\", lw=1, zorder=100)\n",
+    "ax[1].plot(\n",
+    "    accuracies_over_time_cboe[\"tick(all)\"],\n",
+    "    label=r\"$\\operatorname{tick}_{\\mathrm{all}}$\",\n",
+    "    lw=1,\n",
+    ")\n",
+    "ax[1].plot(\n",
+    "    accuracies_over_time_cboe[\"quote(best)\"],\n",
+    "    label=r\"$\\operatorname{quote}_{\\mathrm{nbbo}}$\",\n",
+    "    lw=1,\n",
+    "    zorder=20,\n",
+    ")\n",
+    "ax[1].plot(\n",
+    "    accuracies_over_time_cboe[\"quote(best)->quote(ex)->rev_tick(all)\"],\n",
+    "    label=r\"$\\operatorname{gsu}_{\\mathrm{small}}$\",\n",
+    "    lw=1,\n",
+    "    zorder=50,\n",
+    ")\n",
+    "ax[1].plot(\n",
+    "    accuracies_over_time_cboe[\n",
+    "        \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\"\n",
+    "    ],\n",
+    "    label=r\"$\\operatorname{gsu}_{\\mathrm{large}}$\",\n",
+    "    lw=1,\n",
+    "    zorder=100,\n",
+    ")\n",
     "\n",
-    "ax[1].axvline(x=pd.Timestamp('2015-11-05'), linestyle='--', color='grey', linewidth=0.5)\n",
+    "ax[1].axvline(x=pd.Timestamp(\"2015-11-05\"), linestyle=\"--\", color=\"grey\", linewidth=0.5)\n",
     "\n",
-    "#ax[1].legend(frameon=False, loc=\"lower center\", ncols=2, bbox_to_anchor=(0.5, -1))\n",
+    "# ax[1].legend(frameon=False, loc=\"lower center\", ncols=2, bbox_to_anchor=(0.5, -1))\n",
     "\n",
     "# y-axis\n",
     "ax[0].set_ylabel(\"Accuracy\")\n",
     "ax[1].set_ylabel(\"Accuracy\")\n",
-    "ax[0].set_ylim(0,100)\n",
-    "ax[0].yaxis.set_major_formatter(PercentFormatter(100.0,decimals=2))\n",
+    "ax[0].set_ylim(0, 100)\n",
+    "ax[0].yaxis.set_major_formatter(PercentFormatter(100.0, decimals=2))\n",
     "\n",
     "# first ise and last cboe\n",
-    "ax[0].set_xlim(accuracies_over_time_ise.index[0],accuracies_over_time_cboe.index[-1])\n",
+    "ax[0].set_xlim(accuracies_over_time_ise.index[0], accuracies_over_time_cboe.index[-1])\n",
     "\n",
     "#     bins_dt = [pd.Timestamp(\"2000-01-01 00:00:00\"), pd.Timestamp(\"2013-10-24 23:59:00\"), pd.Timestamp(\"2015-11-05 23:59:00\"),pd.Timestamp(\"2099-12-31 23:59:59\")]\n",
     "# else:\n",
@@ -145,10 +221,17 @@
     "\n",
     "handles, labels = ax[1].get_legend_handles_labels()\n",
     "order = [0, 1, 2, 3]\n",
-    "ax[1].legend([handles[idx] for idx in order],[labels[idx] for idx in order], frameon=False, loc=\"lower center\", ncols=4, bbox_to_anchor=(0.5, -0.5))\n",
+    "ax[1].legend(\n",
+    "    [handles[idx] for idx in order],\n",
+    "    [labels[idx] for idx in order],\n",
+    "    frameon=False,\n",
+    "    loc=\"lower center\",\n",
+    "    ncols=4,\n",
+    "    bbox_to_anchor=(0.5, -0.5),\n",
+    ")\n",
     "\n",
-    "ax[0].set_title('ISE')\n",
-    "ax[1].set_title('CBOE')\n",
+    "ax[0].set_title(\"ISE\")\n",
+    "ax[1].set_title(\"CBOE\")\n",
     "\n",
     "# x-axis\n",
     "# ax.set_xlabel(\"Date\")\n",
@@ -186,10 +269,10 @@
     "\n",
     "artifact = run.use_artifact(MODEL)\n",
     "model_dir = artifact.download()\n",
-    "    \n",
-    "with open(Path(model_dir, model_name), 'rb') as f:\n",
+    "\n",
+    "with open(Path(model_dir, model_name), \"rb\") as f:\n",
     "    model = pickle.load(f)\n",
-    "    \n",
+    "\n",
     "clf = model.clf"
    ]
   },
@@ -201,7 +284,10 @@
    },
    "outputs": [],
    "source": [
-    "pretrain_data = [{'train_loss': d['train_loss'], 'val_loss': d['val_loss'], 'epoch': d['epoch']} for d in model._stats_pretrain_epoch]"
+    "pretrain_data = [\n",
+    "    {\"train_loss\": d[\"train_loss\"], \"val_loss\": d[\"val_loss\"], \"epoch\": d[\"epoch\"]}\n",
+    "    for d in model._stats_pretrain_epoch\n",
+    "]"
    ]
   },
   {
@@ -234,22 +320,22 @@
    },
    "outputs": [],
    "source": [
-    "fig, axes = plt.subplots(1,figsize=(14*CM, 7*CM), sharex=True, sharey=True)\n",
+    "fig, axes = plt.subplots(1, figsize=(14 * CM, 7 * CM), sharex=True, sharey=True)\n",
     "\n",
     "axes.plot(stats_pretrain, lw=1)\n",
     "axes.set_ylabel(\"BCE Loss\")\n",
     "\n",
     "axes.set_xlabel(\"Epoch\")\n",
-    "axes.set_xlim([0,19])\n",
-    "#axes[1].plot(stats_exs.iloc[:,[5,6,7,8]], lw=1)\n",
+    "axes.set_xlim([0, 19])\n",
+    "# axes[1].plot(stats_exs.iloc[:,[5,6,7,8]], lw=1)\n",
     "\n",
     "# axes[0].set_title(\"ISE\")\n",
     "# axes[1].set_title(\"CBOE\")\n",
     "\n",
-    "#axes.set_xlim(pd.to_datetime(\"2005-05-02\"), pd.to_datetime(\"2017-10-31\"))\n",
+    "# axes.set_xlim(pd.to_datetime(\"2005-05-02\"), pd.to_datetime(\"2017-10-31\"))\n",
     "# axes.set_ylim([0,20.0])\n",
     "# axes.yaxis.set_major_formatter(PercentFormatter(100.0,decimals=2))\n",
-    "#axes.xaxis.set_major_formatter(DateFormatter('%b %Y'))\n",
+    "# axes.xaxis.set_major_formatter(DateFormatter('%b %Y'))\n",
     "\n",
     "# labels = [\"_\",\"_\",\"At Mid (ISE)\", \"At Mid (CBOE)\"]\n",
     "\n",
@@ -257,14 +343,18 @@
     "\n",
     "labels = [\"Loss (Train)\", \"Loss (Val)\"]\n",
     "\n",
-    "axes.legend(labels, frameon=False, loc = \"lower center\" ,bbox_to_anchor = (0, -0.7, 1, 1), ncols=2)\n",
+    "axes.legend(\n",
+    "    labels, frameon=False, loc=\"lower center\", bbox_to_anchor=(0, -0.7, 1, 1), ncols=2\n",
+    ")\n",
     "\n",
     "# axes.legend()\n",
     "\n",
     "# axes.legend(labels, frameon=False, loc = \"lower center\",bbox_to_anchor=(0.5, -0.5), ncols=2)\n",
     "\n",
     "plt.tight_layout()\n",
-    "plt.savefig(\"../reports/Graphs/transformer_ise_pretrain_classical.pdf\", bbox_inches=\"tight\")"
+    "plt.savefig(\n",
+    "    \"../reports/Graphs/transformer_ise_pretrain_classical.pdf\", bbox_inches=\"tight\"\n",
+    ")"
    ]
   },
   {
@@ -282,8 +372,12 @@
    },
    "outputs": [],
    "source": [
-    "tsize_ise = pd.read_parquet(\"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_all-accuracies-tsize-ex.parquet\")\n",
-    "tsize_cboe = pd.read_parquet(\"gs://thesis-bucket-option-trade-classification/data/results/cboe_supervised_all-accuracies-tsize-ex.parquet\")"
+    "tsize_ise = pd.read_parquet(\n",
+    "    \"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_all-accuracies-tsize-ex.parquet\"\n",
+    ")\n",
+    "tsize_cboe = pd.read_parquet(\n",
+    "    \"gs://thesis-bucket-option-trade-classification/data/results/cboe_supervised_all-accuracies-tsize-ex.parquet\"\n",
+    ")"
    ]
   },
   {
@@ -294,8 +388,12 @@
    },
    "outputs": [],
    "source": [
-    "stats_ise = pd.read_parquet('gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_all-classical-stats-over-time.parquet')\n",
-    "stats_cboe = pd.read_parquet('gs://thesis-bucket-option-trade-classification/data/results/cboe_supervised_all-classical-stats-over-time.parquet')"
+    "stats_ise = pd.read_parquet(\n",
+    "    \"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_all-classical-stats-over-time.parquet\"\n",
+    ")\n",
+    "stats_cboe = pd.read_parquet(\n",
+    "    \"gs://thesis-bucket-option-trade-classification/data/results/cboe_supervised_all-classical-stats-over-time.parquet\"\n",
+    ")"
    ]
   },
   {
@@ -306,7 +404,7 @@
    },
    "outputs": [],
    "source": [
-    "stats_exs = pd.concat([stats_ise,stats_cboe], axis=1)"
+    "stats_exs = pd.concat([stats_ise, stats_cboe], axis=1)"
    ]
   },
   {
@@ -317,7 +415,7 @@
    },
    "outputs": [],
    "source": [
-    "tsize_exs = pd.concat([tsize_ise,tsize_cboe], axis=1)"
+    "tsize_exs = pd.concat([tsize_ise, tsize_cboe], axis=1)"
    ]
   },
   {
@@ -339,33 +437,45 @@
    },
    "outputs": [],
    "source": [
-    "fig, ax = plt.subplots(2,1,figsize=(14*CM, 6*CM), sharex=True, sharey=True)\n",
+    "fig, ax = plt.subplots(2, 1, figsize=(14 * CM, 6 * CM), sharex=True, sharey=True)\n",
     "\n",
-    "ax[0].axvline(x=pd.to_datetime(\"2005-05-02\"), linestyle='--', color='grey', linewidth=0.5)\n",
-    "ax[0].axvline(x=pd.to_datetime(\"2013-10-24\"), linestyle='--', color='grey', linewidth=0.5)\n",
-    "ax[0].axvline(x=pd.to_datetime(\"2015-11-05\"), linestyle='--', color='grey', linewidth=0.5)\n",
+    "ax[0].axvline(\n",
+    "    x=pd.to_datetime(\"2005-05-02\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
+    "ax[0].axvline(\n",
+    "    x=pd.to_datetime(\"2013-10-24\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
+    "ax[0].axvline(\n",
+    "    x=pd.to_datetime(\"2015-11-05\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
     "\n",
-    "ax[0].plot(tsize_exs.iloc[:,1], lw=1, label=\"CBOE\")\n",
-    "ax[0].plot(tsize_exs.iloc[:,0], lw=1, label=\"ISE\")\n",
+    "ax[0].plot(tsize_exs.iloc[:, 1], lw=1, label=\"CBOE\")\n",
+    "ax[0].plot(tsize_exs.iloc[:, 0], lw=1, label=\"ISE\")\n",
     "ax[0].set_ylabel(\"Accuracy\")\n",
     "ax[0].set_xlabel(None)\n",
     "\n",
-    "ax[1].axvline(x=pd.to_datetime(\"2005-05-02\"), linestyle='--', color='grey', linewidth=0.5)\n",
-    "ax[1].axvline(x=pd.to_datetime(\"2013-10-24\"), linestyle='--', color='grey', linewidth=0.5)\n",
-    "ax[1].axvline(x=pd.to_datetime(\"2015-11-05\"), linestyle='--', color='grey', linewidth=0.5)\n",
+    "ax[1].axvline(\n",
+    "    x=pd.to_datetime(\"2005-05-02\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
+    "ax[1].axvline(\n",
+    "    x=pd.to_datetime(\"2013-10-24\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
+    "ax[1].axvline(\n",
+    "    x=pd.to_datetime(\"2015-11-05\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
     "\n",
-    "ax[1].plot(stats_exs.iloc[:,9], lw=1, label=\"CBOE\")\n",
-    "ax[1].plot(stats_exs.iloc[:,4], lw=1, label=\"ISE\")\n",
+    "ax[1].plot(stats_exs.iloc[:, 9], lw=1, label=\"CBOE\")\n",
+    "ax[1].plot(stats_exs.iloc[:, 4], lw=1, label=\"ISE\")\n",
     "ax[1].set_ylabel(\"Coverage\")\n",
     "ax[0].set_xlabel(None)\n",
     "\n",
     "ax[0].set_xlim(pd.to_datetime(\"2005-05-02\"), pd.to_datetime(\"2017-10-31\"))\n",
-    "ax[0].set_ylim(0,100)\n",
-    "ax[0].yaxis.set_major_formatter(PercentFormatter(100.0,decimals=2))\n",
+    "ax[0].set_ylim(0, 100)\n",
+    "ax[0].yaxis.set_major_formatter(PercentFormatter(100.0, decimals=2))\n",
     "# ax.xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))\n",
-    "ax[0].xaxis.set_major_formatter(DateFormatter('%b %Y'))\n",
+    "ax[0].xaxis.set_major_formatter(DateFormatter(\"%b %Y\"))\n",
     "\n",
-    "ax[1].legend(frameon=False, loc = \"lower center\",bbox_to_anchor=(0.5, -0.7), ncols=2)\n",
+    "ax[1].legend(frameon=False, loc=\"lower center\", bbox_to_anchor=(0.5, -0.7), ncols=2)\n",
     "\n",
     "# plt.show()\n",
     "\n",
@@ -387,8 +497,12 @@
    },
    "outputs": [],
    "source": [
-    "na_over_time_ise = pd.read_parquet(\"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_all-missing-over-time.parquet\")\n",
-    "na_over_time_cboe = pd.read_parquet(\"gs://thesis-bucket-option-trade-classification/data/results/cboe_supervised_all-missing-over-time.parquet\")"
+    "na_over_time_ise = pd.read_parquet(\n",
+    "    \"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_all-missing-over-time.parquet\"\n",
+    ")\n",
+    "na_over_time_cboe = pd.read_parquet(\n",
+    "    \"gs://thesis-bucket-option-trade-classification/data/results/cboe_supervised_all-missing-over-time.parquet\"\n",
+    ")"
    ]
   },
   {
@@ -417,13 +531,24 @@
    },
    "outputs": [],
    "source": [
-    "filter = ['tick(all)', 'quote(best)','quote(best)->quote(ex)->rev_tick(all)','trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)']\n",
+    "filter = [\n",
+    "    \"tick(all)\",\n",
+    "    \"quote(best)\",\n",
+    "    \"quote(best)->quote(ex)->rev_tick(all)\",\n",
+    "    \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all)\",\n",
+    "]\n",
     "\n",
-    "fig, axes = plt.subplots(2,1,figsize=(14*CM, 9*CM), sharex=True, sharey=True)\n",
+    "fig, axes = plt.subplots(2, 1, figsize=(14 * CM, 9 * CM), sharex=True, sharey=True)\n",
     "\n",
-    "axes[0].axvline(x=pd.to_datetime(\"2015-06-15\"), linestyle='--', color='grey', linewidth=0.5)\n",
-    "axes[0].axvline(x=pd.to_datetime(\"2016-10-12\"), linestyle='--', color='grey', linewidth=0.5)\n",
-    "axes[1].axvline(x=pd.to_datetime(\"2016-10-12\"), linestyle='--', color='grey', linewidth=0.5)\n",
+    "axes[0].axvline(\n",
+    "    x=pd.to_datetime(\"2015-06-15\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
+    "axes[0].axvline(\n",
+    "    x=pd.to_datetime(\"2016-10-12\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
+    "axes[1].axvline(\n",
+    "    x=pd.to_datetime(\"2016-10-12\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
     "\n",
     "\n",
     "axes[0].plot(1 - na_over_time_ise[filter], lw=1)\n",
@@ -437,15 +562,27 @@
     "axes[1].set_ylabel(\"Coverage\")\n",
     "axes[0].set_xlabel(None)\n",
     "# ax.set_ylim(45, 100)\n",
-    "axes[0].yaxis.set_major_formatter(PercentFormatter(1.0,decimals=2))\n",
+    "axes[0].yaxis.set_major_formatter(PercentFormatter(1.0, decimals=2))\n",
     "# ax.xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))\n",
-    "axes[0].xaxis.set_major_formatter(DateFormatter('%b %Y'))\n",
+    "axes[0].xaxis.set_major_formatter(DateFormatter(\"%b %Y\"))\n",
     "\n",
     "ylim = axes[0].get_ylim()\n",
     "\n",
     "axes[0].set_xlim(pd.to_datetime(\"2005-05-02\"), pd.to_datetime(\"2017-10-31\"))\n",
     "\n",
-    "plt.legend([\"_\",\"$\\operatorname{tick}_{\\mathrm{all}}$\", \"$\\operatorname{quote}_{\\mathrm{nbbo}}$\",\"$\\operatorname{gsu}_{\\mathrm{small}}$\",\"$\\operatorname{gsu}_{\\mathrm{large}}$\"],frameon=False, loc = \"lower center\", bbox_to_anchor=(0.5, -0.5), ncols=4)\n",
+    "plt.legend(\n",
+    "    [\n",
+    "        \"_\",\n",
+    "        r\"$\\operatorname{tick}_{\\mathrm{all}}$\",\n",
+    "        r\"$\\operatorname{quote}_{\\mathrm{nbbo}}$\",\n",
+    "        r\"$\\operatorname{gsu}_{\\mathrm{small}}$\",\n",
+    "        r\"$\\operatorname{gsu}_{\\mathrm{large}}$\",\n",
+    "    ],\n",
+    "    frameon=False,\n",
+    "    loc=\"lower center\",\n",
+    "    bbox_to_anchor=(0.5, -0.5),\n",
+    "    ncols=4,\n",
+    ")\n",
     "\n",
     "plt.tight_layout()\n",
     "\n",
@@ -460,7 +597,7 @@
    },
    "outputs": [],
    "source": [
-    "stats_exs['quote_best_mid_na'].mean()"
+    "stats_exs[\"quote_best_mid_na\"].mean()"
    ]
   },
   {
@@ -471,14 +608,18 @@
    },
    "outputs": [],
    "source": [
-    "fig, axes = plt.subplots(1,figsize=(14*CM, 6*CM), sharex=True, sharey=True)\n",
+    "fig, axes = plt.subplots(1, figsize=(14 * CM, 6 * CM), sharex=True, sharey=True)\n",
     "\n",
-    "axes.axvline(x=pd.to_datetime(\"2015-06-15\"), linestyle='--', color='grey', linewidth=0.5)\n",
-    "axes.axvline(x=pd.to_datetime(\"2016-10-12\"), linestyle='--', color='grey', linewidth=0.5)\n",
+    "axes.axvline(\n",
+    "    x=pd.to_datetime(\"2015-06-15\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
+    "axes.axvline(\n",
+    "    x=pd.to_datetime(\"2016-10-12\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
     "# axes[1].axvline(x=pd.to_datetime(\"2016-10-12\"), linestyle='--', color='grey', linewidth=0.5)\n",
     "\n",
-    "axes.plot(stats_exs.iloc[:,[4,10]], lw=1)\n",
-    "#axes[1].plot(stats_exs.iloc[:,[5,6,7,8]], lw=1)\n",
+    "axes.plot(stats_exs.iloc[:, [4, 10]], lw=1)\n",
+    "# axes[1].plot(stats_exs.iloc[:,[5,6,7,8]], lw=1)\n",
     "\n",
     "# axes[0].set_title(\"ISE\")\n",
     "# axes[1].set_title(\"CBOE\")\n",
@@ -488,14 +629,16 @@
     "axes.invert_yaxis()\n",
     "\n",
     "axes.set_xlim(pd.to_datetime(\"2005-05-02\"), pd.to_datetime(\"2017-10-31\"))\n",
-    "axes.set_ylim([100-lim*100 for lim in ylim])\n",
-    "axes.yaxis.set_major_formatter(PercentFormatter(100.0,decimals=2))\n",
-    "axes.xaxis.set_major_formatter(DateFormatter('%b %Y'))\n",
+    "axes.set_ylim([100 - lim * 100 for lim in ylim])\n",
+    "axes.yaxis.set_major_formatter(PercentFormatter(100.0, decimals=2))\n",
+    "axes.xaxis.set_major_formatter(DateFormatter(\"%b %Y\"))\n",
     "axes.set_ylabel(\"Percentage\")\n",
     "\n",
-    "labels = [\"_\",\"_\",\"At Mid (ISE)\", \"At Mid (CBOE)\"]\n",
+    "labels = [\"_\", \"_\", \"At Mid (ISE)\", \"At Mid (CBOE)\"]\n",
     "\n",
-    "plt.legend(labels, frameon=False, loc = \"lower center\",bbox_to_anchor=(0.5, -0.5), ncols=2)\n",
+    "plt.legend(\n",
+    "    labels, frameon=False, loc=\"lower center\", bbox_to_anchor=(0.5, -0.5), ncols=2\n",
+    ")\n",
     "\n",
     "plt.tight_layout()\n",
     "plt.savefig(\"../reports/Graphs/classical_at_mid_over_time.pdf\", bbox_inches=\"tight\")"
@@ -509,25 +652,39 @@
    },
    "outputs": [],
    "source": [
-    "fig, axes = plt.subplots(2,1,figsize=(14*CM, 9*CM), sharex=True, sharey=True)\n",
+    "fig, axes = plt.subplots(2, 1, figsize=(14 * CM, 9 * CM), sharex=True, sharey=True)\n",
     "\n",
-    "axes[0].axvline(x=pd.to_datetime(\"2015-06-15\"), linestyle='--', color='grey', linewidth=0.5)\n",
-    "axes[0].axvline(x=pd.to_datetime(\"2016-10-12\"), linestyle='--', color='grey', linewidth=0.5)\n",
-    "axes[1].axvline(x=pd.to_datetime(\"2016-10-12\"), linestyle='--', color='grey', linewidth=0.5)\n",
+    "axes[0].axvline(\n",
+    "    x=pd.to_datetime(\"2015-06-15\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
+    "axes[0].axvline(\n",
+    "    x=pd.to_datetime(\"2016-10-12\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
+    "axes[1].axvline(\n",
+    "    x=pd.to_datetime(\"2016-10-12\"), linestyle=\"--\", color=\"grey\", linewidth=0.5\n",
+    ")\n",
     "\n",
-    "axes[0].plot(stats_exs.iloc[:,[0,1,2,3]], lw=1)\n",
-    "axes[1].plot(stats_exs.iloc[:,[5,6,7,8]], lw=1)\n",
+    "axes[0].plot(stats_exs.iloc[:, [0, 1, 2, 3]], lw=1)\n",
+    "axes[1].plot(stats_exs.iloc[:, [5, 6, 7, 8]], lw=1)\n",
     "\n",
     "axes[0].set_title(\"ISE\")\n",
     "axes[1].set_title(\"CBOE\")\n",
     "\n",
     "axes[0].set_xlim(pd.to_datetime(\"2005-05-02\"), pd.to_datetime(\"2017-10-31\"))\n",
-    "axes[0].yaxis.set_major_formatter(PercentFormatter(100.0,decimals=2))\n",
-    "axes[0].xaxis.set_major_formatter(DateFormatter('%b %Y'))\n",
-    "\n",
-    "labels = [\"_\",\"$\\operatorname{tick}_{\\mathrm{ex}}$\", \"$\\operatorname{tick}_{\\mathrm{all}}$\", \"$\\operatorname{quote}_{\\mathrm{nbbo}}$\", \"$\\operatorname{quote}_{\\mathrm{ex}}$\"]\n",
+    "axes[0].yaxis.set_major_formatter(PercentFormatter(100.0, decimals=2))\n",
+    "axes[0].xaxis.set_major_formatter(DateFormatter(\"%b %Y\"))\n",
+    "\n",
+    "labels = [\n",
+    "    \"_\",\n",
+    "    r\"$\\operatorname{tick}_{\\mathrm{ex}}$\",\n",
+    "    r\"$\\operatorname{tick}_{\\mathrm{all}}$\",\n",
+    "    r\"$\\operatorname{quote}_{\\mathrm{nbbo}}$\",\n",
+    "    r\"$\\operatorname{quote}_{\\mathrm{ex}}$\",\n",
+    "]\n",
     "\n",
-    "plt.legend(labels, frameon=False, loc = \"lower center\",bbox_to_anchor=(0.5, -0.7), ncols=4)\n",
+    "plt.legend(\n",
+    "    labels, frameon=False, loc=\"lower center\", bbox_to_anchor=(0.5, -0.7), ncols=4\n",
+    ")\n",
     "\n",
     "plt.tight_layout()\n",
     "plt.savefig(\"../reports/Graphs/not_applicable_over_time.pdf\", bbox_inches=\"tight\")"
@@ -574,22 +731,22 @@
    "outputs": [],
    "source": [
     "# 100 linearly spaced numbers\n",
-    "x = np.linspace(-2,2,100)\n",
+    "x = np.linspace(-2, 2, 100)\n",
     "# the function, which is y = x^2 here\n",
-    "y = np.log(1 + np.exp(-2*x))\n",
+    "y = np.log(1 + np.exp(-2 * x))\n",
     "\n",
     "# setting the axes at the centre\n",
     "fig = plt.figure(figsize=(12 * CM, 6 * CM))\n",
     "ax = fig.add_subplot(1, 1, 1)\n",
     "\n",
-    "ax.xaxis.set_ticks_position('bottom')\n",
-    "ax.yaxis.set_ticks_position('left')\n",
+    "ax.xaxis.set_ticks_position(\"bottom\")\n",
+    "ax.yaxis.set_ticks_position(\"left\")\n",
     "\n",
-    "ax.set_xlabel(\"Margin $y-F_m(\\mathbf{x})$\")\n",
+    "ax.set_xlabel(r\"Margin $y-F_m(\\mathbf{x})$\")\n",
     "ax.set_ylabel(\"Loss\")\n",
     "\n",
     "# plot the function\n",
-    "plt.plot(x,y, label=\"cross-entropy loss\")\n",
+    "plt.plot(x, y, label=\"cross-entropy loss\")\n",
     "plt.legend(frameon=False)\n",
     "plt.savefig(\"../reports/Graphs/cross-entropy-loss.pdf\", bbox_inches=\"tight\")\n",
     "# show the p"
@@ -633,8 +790,10 @@
     "# y_2 = regr_2.predict(X_test)\n",
     "\n",
     "# Plot the results\n",
-    "plt.figure(figsize=(8*CM,6*CM))\n",
-    "plt.scatter(X, y, s=20, c=\"yellowgreen\", edgecolors=\"black\", linewidth=0.5, label=\"Data\")\n",
+    "plt.figure(figsize=(8 * CM, 6 * CM))\n",
+    "plt.scatter(\n",
+    "    X, y, s=20, c=\"yellowgreen\", edgecolors=\"black\", linewidth=0.5, label=\"Data\"\n",
+    ")\n",
     "plt.plot(X_test, y_1, color=\"cornflowerblue\", label=\"Approximation\", linewidth=1)\n",
     "# plt.plot(X_test, y_2, color=\"yellowgreen\", label=\"max_depth=5\", linewidth=2)\n",
     "plt.xlabel(\"Feature\")\n",
@@ -667,7 +826,7 @@
     "def to_mpl(start: str, end: str):\n",
     "    mpl_start = mdates.date2num(pd.to_datetime(start))\n",
     "    mpl_end = mdates.date2num(pd.to_datetime(end))\n",
-    "    return mpl_start, mpl_end - mpl_start\n"
+    "    return mpl_start, mpl_end - mpl_start"
    ]
   },
   {
@@ -680,7 +839,7 @@
    "outputs": [],
    "source": [
     "def to_pos(span: tuple):\n",
-    "    return span[0] + 0.5 * span[1]\n"
+    "    return span[0] + 0.5 * span[1]"
    ]
   },
   {
@@ -701,7 +860,13 @@
     "\n",
     "# ise pretraining\n",
     "span = [to_mpl(\"2013-04-23\", \"2013-10-24\")]\n",
-    "ax.broken_barh(span, (2.5, 1), facecolors=(168/255,209/255,238/255), edgecolor=\"black\", linewidth=0.8)\n",
+    "ax.broken_barh(\n",
+    "    span,\n",
+    "    (2.5, 1),\n",
+    "    facecolors=(168 / 255, 209 / 255, 238 / 255),\n",
+    "    edgecolor=\"black\",\n",
+    "    linewidth=0.8,\n",
+    ")\n",
     "\n",
     "# ax.text(\n",
     "#     x=to_pos(span[0]),\n",
@@ -713,10 +878,16 @@
     "#     fontsize=\"small\",\n",
     "# )\n",
     "\n",
-    "arrow_properties = dict(facecolor='black', arrowstyle='->')\n",
-    "ax.annotate(\"train\", (to_pos(span[0]), 3), xytext=(30, 0),\n",
-    "                textcoords='offset points', ha='center', va='center',\n",
-    "                arrowprops=arrow_properties)\n",
+    "arrow_properties = dict(facecolor=\"black\", arrowstyle=\"->\")\n",
+    "ax.annotate(\n",
+    "    \"train\",\n",
+    "    (to_pos(span[0]), 3),\n",
+    "    xytext=(30, 0),\n",
+    "    textcoords=\"offset points\",\n",
+    "    ha=\"center\",\n",
+    "    va=\"center\",\n",
+    "    arrowprops=arrow_properties,\n",
+    ")\n",
     "\n",
     "\n",
     "spans = [\n",
@@ -729,7 +900,11 @@
     "ax.broken_barh(\n",
     "    spans,\n",
     "    (1.2, 1),\n",
-    "    facecolors=((168/255,209/255,238/255), (204/255,212/255,151/255), (239/255,171/255,170/255)),\n",
+    "    facecolors=(\n",
+    "        (168 / 255, 209 / 255, 238 / 255),\n",
+    "        (204 / 255, 212 / 255, 151 / 255),\n",
+    "        (239 / 255, 171 / 255, 170 / 255),\n",
+    "    ),\n",
     "    edgecolor=\"black\",\n",
     "    linewidth=0.8,\n",
     ")\n",
@@ -761,7 +936,7 @@
     "bx.broken_barh(\n",
     "    spans,\n",
     "    (1.85, 1),\n",
-    "    facecolors=(239/255,171/255,170/255),\n",
+    "    facecolors=(239 / 255, 171 / 255, 170 / 255),\n",
     "    edgecolor=\"black\",\n",
     "    linewidth=0.8,\n",
     ")\n",
@@ -780,7 +955,6 @@
     "    )\n",
     "\n",
     "\n",
-    "\n",
     "# Modify y-axis tick labels\n",
     "ax.set_yticks([1.7, 3], labels=[\"ISE\\n Labeled\", \"ISE\\n Unlabeled\"])\n",
     "bx.set_yticks([2.35], labels=[\"CBOE\\n Labeled\"])\n",
@@ -794,7 +968,7 @@
     "plt.xlabel(\"Date\")\n",
     "\n",
     "# plt.show()\n",
-    "plt.savefig(\"../reports/Graphs/train-test-split.pdf\", bbox_inches=\"tight\")\n"
+    "plt.savefig(\"../reports/Graphs/train-test-split.pdf\", bbox_inches=\"tight\")"
    ]
   },
   {
@@ -830,7 +1004,7 @@
     "            if titles:\n",
     "                ax.set_title(titles[j])\n",
     "    fig.colorbar(pcm, ax=axes)\n",
-    "    plt.savefig(\"../reports/Graphs/attention-maps.pdf\", bbox_inches=\"tight\")\n"
+    "    plt.savefig(\"../reports/Graphs/attention-maps.pdf\", bbox_inches=\"tight\")"
    ]
   },
   {
@@ -845,7 +1019,7 @@
     "attention_weights = torch.rand(size=(2, 4, 10, 10))\n",
     "show_heatmaps(\n",
     "    attention_weights, xlabel=\"Keys\", ylabel=\"Queries\", figsize=(12 * CM, 6 * CM)\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -875,7 +1049,7 @@
     "\n",
     "    pos_encoding = angle_rads[np.newaxis, ...]\n",
     "\n",
-    "    return pos_encoding\n"
+    "    return pos_encoding"
    ]
   },
   {
@@ -900,7 +1074,7 @@
     "plt.ylim((tokens, 0))\n",
     "plt.ylabel(\"token position $t$\")\n",
     "plt.colorbar()\n",
-    "plt.savefig(\"../reports/Graphs/positional-encoding.pdf\", bbox_inches=\"tight\")\n"
+    "plt.savefig(\"../reports/Graphs/positional-encoding.pdf\", bbox_inches=\"tight\")"
    ]
   },
   {
@@ -927,13 +1101,10 @@
     "\n",
     "# import numpy as np\n",
     "# import matplotlib.pyplot as plt\n",
+    "from matplotlib.ticker import MaxNLocator  # needed for integer only on axis\n",
     "from sklearn import datasets\n",
-    "from sklearn.svm import SVC\n",
-    "from sklearn.semi_supervised import LabelSpreading\n",
     "from sklearn.semi_supervised import SelfTrainingClassifier\n",
-    "\n",
-    "from matplotlib.ticker import MaxNLocator # needed for integer only on axis\n",
-    "from matplotlib.lines import Line2D # for creating the custom legend\n",
+    "from sklearn.svm import SVC\n",
     "\n",
     "iris = datasets.load_iris()\n",
     "\n",
@@ -982,46 +1153,50 @@
     "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n",
     "xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n",
     "\n",
-    "PROB_DOT_SCALE = 40 # modifier to scale the probability dots\n",
-    "PROB_DOT_SCALE_POWER = 3 # exponential used to increase/decrease size of prob dots\n",
-    "TRUE_DOT_SIZE = 50 #\n",
+    "PROB_DOT_SCALE = 40  # modifier to scale the probability dots\n",
+    "PROB_DOT_SCALE_POWER = 3  # exponential used to increase/decrease size of prob dots\n",
+    "TRUE_DOT_SIZE = 50  #\n",
     "\n",
-    "redish = '#d73027'\n",
-    "orangeish = '#fc8d59'\n",
-    "yellowish = '#fee090'\n",
-    "blueish = '#4575b4'\n",
-    "colormap = np.array([redish,blueish,orangeish])\n",
+    "redish = \"#d73027\"\n",
+    "orangeish = \"#fc8d59\"\n",
+    "yellowish = \"#fee090\"\n",
+    "blueish = \"#4575b4\"\n",
+    "colormap = np.array([redish, blueish, orangeish])\n",
     "\n",
     "color_map = {-1: (1, 1, 1), 0: (0, 0, 0.9), 1: (1, 0, 0), 2: (0.8, 0.6, 0)}\n",
     "\n",
-    "ax = plt.figure(figsize=(12*CM, 6*CM))\n",
+    "ax = plt.figure(figsize=(12 * CM, 6 * CM))\n",
     "\n",
     "classifiers = (rbf_svc, st30)\n",
     "for i, (clf, y_train, title) in enumerate(classifiers):\n",
     "    # Plot the decision boundary. For that, we will assign a color to each\n",
     "    # point in the mesh [x_min, x_max]x[y_min, y_max].\n",
-    "    plt.subplot(1, 2, i+1)\n",
+    "    plt.subplot(1, 2, i + 1)\n",
     "    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
-    "    \n",
-    "    z_proba =  clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])\n",
+    "\n",
+    "    z_proba = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])\n",
     "    # the size of each probability dot\n",
-    "    Z_size = np.max(Z_proba, axis=1) \n",
-    "    \n",
+    "    Z_size = np.max(Z_proba, axis=1)\n",
+    "\n",
     "    Z = Z.reshape(xx.shape)\n",
-    "    \n",
-    "    tri = plt.tricontourf(xx.flatten(), yy.flatten(), z_proba[:,1], levels=14, cmap=\"RdBu_r\")\n",
-    "    plt.contour(xx, yy, z_proba[:,1].reshape(xx.shape), 15, linewidths=0.5, colors=\"k\")\n",
-    "    \n",
-    "    \n",
+    "\n",
+    "    tri = plt.tricontourf(\n",
+    "        xx.flatten(), yy.flatten(), z_proba[:, 1], levels=14, cmap=\"RdBu_r\"\n",
+    "    )\n",
+    "    plt.contour(xx, yy, z_proba[:, 1].reshape(xx.shape), 15, linewidths=0.5, colors=\"k\")\n",
+    "\n",
     "    # Plot also the training points\n",
     "    colors = [color_map[y] for y in y_train]\n",
     "\n",
-    "    \n",
-    "    plt.scatter(X[:, 0], X[:, 1], c=colors, s=20, edgecolors=\"black\", linewidth=0.5, zorder=10)\n",
+    "    plt.scatter(\n",
+    "        X[:, 0], X[:, 1], c=colors, s=20, edgecolors=\"black\", linewidth=0.5, zorder=10\n",
+    "    )\n",
     "\n",
     "    plt.title(title, y=-0.3)\n",
     "\n",
-    "plt.savefig(\"../reports/Graphs/semi-supervised-decision-boundary.pdf\", bbox_inches=\"tight\")\n",
+    "plt.savefig(\n",
+    "    \"../reports/Graphs/semi-supervised-decision-boundary.pdf\", bbox_inches=\"tight\"\n",
+    ")\n",
     "# plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n",
     "plt.show()"
    ]
@@ -1047,13 +1222,13 @@
     "# set study globally here\n",
     "# study = \"1gzk7msy.optuna:v49\" # gbm classical\n",
     "# study = \"3vntumoi.optuna:v49\" # gbm classical-size\n",
-    "study = \"2t5zo50f.optuna:v49\" # gbm ml\n",
+    "study = \"2t5zo50f.optuna:v49\"  # gbm ml\n",
     "\n",
     "# study = \"37lymmzc.optuna:v49\" # gbm semi-classical\n",
     "# study = \"1vmti6db.optuna:v49\" # gbm semi classical-size\n",
     "# study = \"t55nd8r0.optuna:v49\" # gbm semi ml\n",
     "\n",
-    "# transformer \n",
+    "# transformer\n",
     "# study = \"3jpe46s1.optuna:v9\" # transformer classical\n",
     "# study = \"1qx3ul4j.optuna:v9\" # transformer classical-size\n",
     "# study = \"2h81aiow.optuna:v9\" # transformer ml"
@@ -1100,7 +1275,7 @@
    },
    "outputs": [],
    "source": [
-    "file  = open(f\"./artifacts/{study_id}.optuna:{version}/{study_id}.optuna\",'rb')\n",
+    "file = open(f\"./artifacts/{study_id}.optuna:{version}/{study_id}.optuna\", \"rb\")\n",
     "study = pickle.load(file)\n",
     "\n",
     "sampler = study.sampler\n",
@@ -1118,7 +1293,20 @@
    },
    "outputs": [],
    "source": [
-    "LUT_LABELS = {\"Objective Value\": \"Accuracy\", \"bagging_temperature\": \"Bagging Temp.\", \"depth\":\"Depth\", \"l2_leaf_reg\": \"$\\ell_2$ Leaf Reg.\" , \"learning_rate\" : \"$\\eta$\", \"random_strength\": \"Rand. Str.\", \"attention_dropout\": \"Att Dropout\", \"d_token\":\"$d_e$\",\"ffn_dropout\":\"FFN Dropout\", \"weight_decay\":\"$\\lambda$\",\"lr\": \"$\\eta$\",\"n_blocks\":\"$L$\"}"
+    "LUT_LABELS = {\n",
+    "    \"Objective Value\": \"Accuracy\",\n",
+    "    \"bagging_temperature\": \"Bagging Temp.\",\n",
+    "    \"depth\": \"Depth\",\n",
+    "    \"l2_leaf_reg\": r\"$\\ell_2$ Leaf Reg.\",\n",
+    "    \"learning_rate\": r\"$\\eta$\",\n",
+    "    \"random_strength\": \"Rand. Str.\",\n",
+    "    \"attention_dropout\": \"Att Dropout\",\n",
+    "    \"d_token\": \"$d_e$\",\n",
+    "    \"ffn_dropout\": \"FFN Dropout\",\n",
+    "    \"weight_decay\": r\"$\\lambda$\",\n",
+    "    \"lr\": r\"$\\eta$\",\n",
+    "    \"n_blocks\": \"$L$\",\n",
+    "}"
    ]
   },
   {
@@ -1130,36 +1318,32 @@
    },
    "outputs": [],
    "source": [
-    "from typing import Callable\n",
-    "from typing import Dict\n",
-    "from typing import List\n",
-    "from typing import Optional\n",
-    "from typing import Sequence\n",
-    "from typing import Tuple\n",
-    "from typing import Union\n",
+    "from collections.abc import Sequence\n",
+    "from typing import Callable, Dict, List, Optional, Tuple, Union\n",
     "\n",
     "import numpy as np\n",
-    "\n",
-    "from optuna._experimental import experimental_func\n",
     "from optuna._imports import try_import\n",
     "from optuna.logging import get_logger\n",
     "from optuna.study import Study\n",
     "from optuna.trial import FrozenTrial\n",
-    "from optuna.visualization._contour import _AxisInfo\n",
-    "from optuna.visualization._contour import _ContourInfo\n",
-    "from optuna.visualization._contour import _get_contour_info\n",
-    "from optuna.visualization._contour import _SubContourInfo\n",
+    "from optuna.visualization._contour import (\n",
+    "    _AxisInfo,\n",
+    "    _ContourInfo,\n",
+    "    _get_contour_info,\n",
+    "    _SubContourInfo,\n",
+    ")\n",
     "from optuna.visualization.matplotlib._matplotlib_imports import _imports\n",
     "\n",
-    "\n",
     "with try_import() as _optuna_imports:\n",
     "    import scipy\n",
     "\n",
     "if _imports.is_successful():\n",
-    "    from optuna.visualization.matplotlib._matplotlib_imports import Axes\n",
-    "    from optuna.visualization.matplotlib._matplotlib_imports import Colormap\n",
-    "    from optuna.visualization.matplotlib._matplotlib_imports import ContourSet\n",
-    "    from optuna.visualization.matplotlib._matplotlib_imports import plt\n",
+    "    from optuna.visualization.matplotlib._matplotlib_imports import (\n",
+    "        Axes,\n",
+    "        Colormap,\n",
+    "        ContourSet,\n",
+    "        plt,\n",
+    "    )\n",
     "\n",
     "_logger = get_logger(__name__)\n",
     "\n",
@@ -1187,7 +1371,6 @@
     "        those of the Plotly-based :func:`~optuna.visualization.plot_contour`.\n",
     "\n",
     "    Example:\n",
-    "\n",
     "        The following code snippet shows how to plot the parameter relationship as contour plot.\n",
     "\n",
     "        .. plot::\n",
@@ -1228,15 +1411,12 @@
     "        The colormap is reversed when the ``target`` argument isn't :obj:`None` or ``direction``\n",
     "        of :class:`~optuna.study.Study` is ``minimize``.\n",
     "    \"\"\"\n",
-    "\n",
     "    _imports.check()\n",
     "    info = _get_contour_info(study, params, target, target_name)\n",
     "    return _get_contour_plot(info)\n",
     "\n",
     "\n",
-    "\n",
     "def _get_contour_plot(info: _ContourInfo) -> \"Axes\":\n",
-    "\n",
     "    sorted_params = info.sorted_params\n",
     "    sub_plot_infos = info.sub_plot_infos\n",
     "    reverse_scale = info.reverse_scale\n",
@@ -1258,7 +1438,7 @@
     "            axcb.set_label(\"Accuracy\")\n",
     "    else:\n",
     "        # Set up the graph style.\n",
-    "        fig, axs = plt.subplots(n_params, n_params, figsize=(15 *CM, 15 *CM))\n",
+    "        fig, axs = plt.subplots(n_params, n_params, figsize=(15 * CM, 15 * CM))\n",
     "        cmap = _set_cmap(reverse_scale)\n",
     "\n",
     "        # Prepare data and draw contour plots.\n",
@@ -1276,7 +1456,7 @@
     "\n",
     "            # Set the formatter for the colorbar\n",
     "            axcb.ax.yaxis.set_major_formatter(formatter)\n",
-    "            \n",
+    "\n",
     "            axcb.set_label(\"Accuracy\")\n",
     "\n",
     "    return axs\n",
@@ -1324,7 +1504,6 @@
     "    List[Union[int, float]],\n",
     "    List[Union[int, float]],\n",
     "]:\n",
-    "\n",
     "    x_values = []\n",
     "    y_values = []\n",
     "    z_values = []\n",
@@ -1344,7 +1523,6 @@
     "        axis: _AxisInfo,\n",
     "        values: Sequence[Union[str, float]],\n",
     "    ) -> Tuple[np.ndarray, List[str], List[int], List[Union[int, float]]]:\n",
-    "\n",
     "        # Convert categorical values to int.\n",
     "        cat_param_labels = []  # type: List[str]\n",
     "        cat_param_pos = []  # type: List[int]\n",
@@ -1359,19 +1537,25 @@
     "\n",
     "        # For x and y, create 1-D array of evenly spaced coordinates on linear or log scale.\n",
     "        if axis.is_log:\n",
-    "            ci = np.logspace(np.log10(axis.range[0]), np.log10(axis.range[1]), CONTOUR_POINT_NUM)\n",
+    "            ci = np.logspace(\n",
+    "                np.log10(axis.range[0]), np.log10(axis.range[1]), CONTOUR_POINT_NUM\n",
+    "            )\n",
     "        else:\n",
     "            ci = np.linspace(axis.range[0], axis.range[1], CONTOUR_POINT_NUM)\n",
     "\n",
     "        return ci, cat_param_labels, cat_param_pos, list(returned_values)\n",
     "\n",
-    "    xi, cat_param_labels_x, cat_param_pos_x, transformed_x_values = _calculate_axis_data(\n",
-    "        xaxis,\n",
-    "        x_values,\n",
+    "    xi, cat_param_labels_x, cat_param_pos_x, transformed_x_values = (\n",
+    "        _calculate_axis_data(\n",
+    "            xaxis,\n",
+    "            x_values,\n",
+    "        )\n",
     "    )\n",
-    "    yi, cat_param_labels_y, cat_param_pos_y, transformed_y_values = _calculate_axis_data(\n",
-    "        yaxis,\n",
-    "        y_values,\n",
+    "    yi, cat_param_labels_y, cat_param_pos_y, transformed_y_values = (\n",
+    "        _calculate_axis_data(\n",
+    "            yaxis,\n",
+    "            y_values,\n",
+    "        )\n",
     "    )\n",
     "\n",
     "    # Calculate grid data points.\n",
@@ -1379,7 +1563,9 @@
     "    # Create irregularly spaced map of trial values\n",
     "    # and interpolate it with Plotly's interpolation formulation.\n",
     "    if xaxis.name != yaxis.name:\n",
-    "        zmap = _create_zmap(transformed_x_values, transformed_y_values, z_values, xi, yi)\n",
+    "        zmap = _create_zmap(\n",
+    "            transformed_x_values, transformed_y_values, z_values, xi, yi\n",
+    "        )\n",
     "        zi = _interpolate_zmap(zmap, CONTOUR_POINT_NUM)\n",
     "\n",
     "    return (\n",
@@ -1396,8 +1582,9 @@
     "    )\n",
     "\n",
     "\n",
-    "def _generate_contour_subplot(info: _SubContourInfo, ax: \"Axes\", cmap: \"Colormap\") -> \"ContourSet\":\n",
-    "\n",
+    "def _generate_contour_subplot(\n",
+    "    info: _SubContourInfo, ax: \"Axes\", cmap: \"Colormap\"\n",
+    ") -> \"ContourSet\":\n",
     "    if len(info.xaxis.indices) < 2 or len(info.yaxis.indices) < 2:\n",
     "        ax.label_outer()\n",
     "        return ax\n",
@@ -1408,9 +1595,9 @@
     "    ax.set_xlim(info.xaxis.range[0], info.xaxis.range[1])\n",
     "    ax.set_ylim(info.yaxis.range[0], info.yaxis.range[1])\n",
     "\n",
-    "    ax.tick_params(axis='both', which='major', labelsize=\"small\")\n",
+    "    ax.tick_params(axis=\"both\", which=\"major\", labelsize=\"small\")\n",
     "    # ax.tick_params(axis='both', which='minor', labelsize=\"x-small\")\n",
-    "    \n",
+    "\n",
     "    if info.xaxis.name == info.yaxis.name:\n",
     "        ax.label_outer()\n",
     "        return ax\n",
@@ -1427,46 +1614,46 @@
     "        y_values,\n",
     "        z_values,\n",
     "    ) = _calculate_griddata(info.xaxis, info.yaxis, info.z_values)\n",
-    "    \n",
+    "\n",
     "    # https://stackoverflow.com/a/55929839/5755604\n",
     "    max_value = max(z_values)\n",
     "    order = np.argsort(z_values)\n",
-    "#             print(order)\n",
-    "#             print(np.take(x_values, order))\n",
-    "            \n",
-    "#             print(np.arrange(x_values[order]))\n",
-    "        \n",
+    "    #             print(order)\n",
+    "    #             print(np.take(x_values, order))\n",
+    "\n",
+    "    #             print(np.arrange(x_values[order]))\n",
+    "\n",
     "    mask = np.array([z < max_value for z in z_values])\n",
     "    # colors = ['black' if z < max_value else 'white' for z in z_values]\n",
-    "            # marker = [\"o\" if z != max_value else \"x\" for z in z_values]\n",
-    "    #x_values = np.take(x_values, order)\n",
-    "    #y_values = np.take(y_values, order)\n",
+    "    # marker = [\"o\" if z != max_value else \"x\" for z in z_values]\n",
+    "    # x_values = np.take(x_values, order)\n",
+    "    # y_values = np.take(y_values, order)\n",
     "    # colors = np.take(colors, order)\n",
-    "    \n",
+    "\n",
     "    x_values = np.array(x_values)\n",
     "    y_values = np.array(y_values)\n",
-    " \n",
+    "\n",
     "    cs = None\n",
     "    if len(zi) > 0:\n",
     "        # print(info.xaxis.name)\n",
     "        if info.xaxis.is_log:\n",
     "            ax.set_xscale(\"log\")\n",
-    "            ax.tick_params(axis='x', which='major', labelsize=\"xx-small\")\n",
+    "            ax.tick_params(axis=\"x\", which=\"major\", labelsize=\"xx-small\")\n",
     "        if info.yaxis.is_log:\n",
     "            ax.set_yscale(\"log\")\n",
-    "            ax.tick_params(axis='y', which='major', labelsize=\"xx-small\")\n",
+    "            ax.tick_params(axis=\"y\", which=\"major\", labelsize=\"xx-small\")\n",
     "        # if info.xaxis.name in [\"lambda\", \"lr\"]:\n",
     "        #     ax.ticklabel_format(style='sci', axis='x')\n",
     "        # if info.xaxis.name in [\"weight_decay\"]:\n",
     "\n",
-    "            # ax.ticklabel_format(style='sci', axis='x')\n",
-    "            \n",
-    "            # print(\"yes\")\n",
-    "            # ax.xaxis.set_major_formatter(plt.NullFormatter())\n",
-    "            # ax.yaxis.set_major_formatter(plt.NullFormatter())\n",
-    "            # ax.set_xticks([])\n",
-    "            # ax.set_yticks([])\n",
-    "        \n",
+    "        # ax.ticklabel_format(style='sci', axis='x')\n",
+    "\n",
+    "        # print(\"yes\")\n",
+    "        # ax.xaxis.set_major_formatter(plt.NullFormatter())\n",
+    "        # ax.yaxis.set_major_formatter(plt.NullFormatter())\n",
+    "        # ax.set_xticks([])\n",
+    "        # ax.set_yticks([])\n",
+    "\n",
     "        if info.xaxis.name != info.yaxis.name:\n",
     "            # Contour the gridded data.\n",
     "            ax.contour(xi, yi, zi, 15, linewidths=0.5, colors=\"k\")\n",
@@ -1482,7 +1669,7 @@
     "                edgecolors=\"grey\",\n",
     "                linewidth=0.5,\n",
     "                # zorder=order,\n",
-    "                )\n",
+    "            )\n",
     "            ax.scatter(\n",
     "                x_values[~mask],\n",
     "                y_values[~mask],\n",
@@ -1492,7 +1679,7 @@
     "                edgecolors=\"grey\",\n",
     "                linewidth=0.5,\n",
     "                zorder=100,\n",
-    "            )      \n",
+    "            )\n",
     "    if info.xaxis.is_cat:\n",
     "        ax.set_xticks(x_cat_param_pos)\n",
     "        ax.set_xticklabels(x_cat_param_label)\n",
@@ -1510,7 +1697,6 @@
     "    xi: np.ndarray,\n",
     "    yi: np.ndarray,\n",
     ") -> Dict[Tuple[int, int], float]:\n",
-    "\n",
     "    # Creates z-map from trial values and params.\n",
     "    # z-map is represented by hashmap of coordinate and trial value pairs.\n",
     "    #\n",
@@ -1530,8 +1716,9 @@
     "    return zmap\n",
     "\n",
     "\n",
-    "def _interpolate_zmap(zmap: Dict[Tuple[int, int], float], contour_plot_num: int) -> np.ndarray:\n",
-    "\n",
+    "def _interpolate_zmap(\n",
+    "    zmap: Dict[Tuple[int, int], float], contour_plot_num: int\n",
+    ") -> np.ndarray:\n",
     "    # Implements interpolation formulation used in Plotly\n",
     "    # to interpolate heatmaps and contour plots\n",
     "    # https://github.com/plotly/plotly.js/blob/95b3bd1bb19d8dc226627442f8f66bce9576def8/src/traces/heatmap/interp2d.js#L15-L20\n",
@@ -1562,7 +1749,10 @@
     "                b[grid_index] = zmap[(x, y)]\n",
     "            else:\n",
     "                for dx, dy in ((-1, 0), (1, 0), (0, -1), (0, 1)):\n",
-    "                    if 0 <= x + dx < contour_plot_num and 0 <= y + dy < contour_plot_num:\n",
+    "                    if (\n",
+    "                        0 <= x + dx < contour_plot_num\n",
+    "                        and 0 <= y + dy < contour_plot_num\n",
+    "                    ):\n",
     "                        a_data.append(1)\n",
     "                        a_row.append(grid_index)\n",
     "                        a_col.append(grid_index)\n",
@@ -1570,7 +1760,9 @@
     "                        a_row.append(grid_index)\n",
     "                        a_col.append(grid_index + dy * contour_plot_num + dx)\n",
     "\n",
-    "    z = scipy.sparse.linalg.spsolve(scipy.sparse.csc_matrix((a_data, (a_row, a_col))), b)\n",
+    "    z = scipy.sparse.linalg.spsolve(\n",
+    "        scipy.sparse.csc_matrix((a_data, (a_row, a_col))), b\n",
+    "    )\n",
     "\n",
     "    return z.reshape((contour_plot_num, contour_plot_num))"
    ]
@@ -1585,7 +1777,9 @@
    "outputs": [],
    "source": [
     "axes = plot_contour(study)\n",
-    "plt.savefig(f\"../reports/Graphs/{study_id}-hyperparam-search-space.pdf\", bbox_inches=\"tight\")"
+    "plt.savefig(\n",
+    "    f\"../reports/Graphs/{study_id}-hyperparam-search-space.pdf\", bbox_inches=\"tight\"\n",
+    ")"
    ]
   },
   {
@@ -1624,18 +1818,58 @@
    },
    "outputs": [],
    "source": [
-    "fig, (ax2, ax4, ax3, ax1) = plt.subplots(4, 1, figsize=(12*CM, 15*CM))\n",
-    "\n",
-    "loss_train = learning_metrics[[\"default_train_loss\", \"activation_train_loss\", \"lr_scheduler_train_loss\", \"sample_weighting_train_loss\", \"label_smoothing_train_loss\"]].dropna(how=\"any\").reset_index(drop=True).rolling(20).mean()\n",
+    "fig, (ax2, ax4, ax3, ax1) = plt.subplots(4, 1, figsize=(12 * CM, 15 * CM))\n",
+    "\n",
+    "loss_train = (\n",
+    "    learning_metrics[\n",
+    "        [\n",
+    "            \"default_train_loss\",\n",
+    "            \"activation_train_loss\",\n",
+    "            \"lr_scheduler_train_loss\",\n",
+    "            \"sample_weighting_train_loss\",\n",
+    "            \"label_smoothing_train_loss\",\n",
+    "        ]\n",
+    "    ]\n",
+    "    .dropna(how=\"any\")\n",
+    "    .reset_index(drop=True)\n",
+    "    .rolling(20)\n",
+    "    .mean()\n",
+    ")\n",
     "\n",
-    "ax2.plot(loss_train.index,loss_train[\"default_train_loss\"], label=\"Default\", linewidth=1, zorder=100)\n",
-    "ax2.plot(loss_train.index,loss_train[\"activation_train_loss\"], label=\"Activation\", linewidth=1)\n",
-    "ax2.plot(loss_train.index,loss_train[\"label_smoothing_train_loss\"], label=\"Label Smoothing\", linewidth=1)\n",
-    "ax2.plot(loss_train.index,loss_train[\"lr_scheduler_train_loss\"], label=\"Lr Schedule\", linewidth=1)\n",
-    "ax2.plot(loss_train.index,loss_train[\"sample_weighting_train_loss\"], label=\"Sample Weighting\", linewidth=1)\n",
+    "ax2.plot(\n",
+    "    loss_train.index,\n",
+    "    loss_train[\"default_train_loss\"],\n",
+    "    label=\"Default\",\n",
+    "    linewidth=1,\n",
+    "    zorder=100,\n",
+    ")\n",
+    "ax2.plot(\n",
+    "    loss_train.index,\n",
+    "    loss_train[\"activation_train_loss\"],\n",
+    "    label=\"Activation\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax2.plot(\n",
+    "    loss_train.index,\n",
+    "    loss_train[\"label_smoothing_train_loss\"],\n",
+    "    label=\"Label Smoothing\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax2.plot(\n",
+    "    loss_train.index,\n",
+    "    loss_train[\"lr_scheduler_train_loss\"],\n",
+    "    label=\"Lr Schedule\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax2.plot(\n",
+    "    loss_train.index,\n",
+    "    loss_train[\"sample_weighting_train_loss\"],\n",
+    "    label=\"Sample Weighting\",\n",
+    "    linewidth=1,\n",
+    ")\n",
     "ax2.set_ylabel(\"Log Loss (Train)\")\n",
     "\n",
-    "ax2.xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))\n",
+    "ax2.xaxis.set_major_formatter(StrMethodFormatter(\"{x:,.0f}\"))\n",
     "# ax2.set_xlabel(\"Iteration\")\n",
     "\n",
     "n_epochs = 10\n",
@@ -1643,26 +1877,66 @@
     "step_size = int(max(learning_metrics[\"default_train_step\"]) / n_epochs)\n",
     "\n",
     "for i in range(step_size, step_size * n_epochs + 1, step_size):\n",
-    "    ax2.axvline(x=i, linestyle='--', color='grey', linewidth=0.5)\n",
+    "    ax2.axvline(x=i, linestyle=\"--\", color=\"grey\", linewidth=0.5)\n",
     "\n",
     "ax2.set_xlim(0, step_size * n_epochs - 1)\n",
     "\n",
     "\n",
     "#\n",
     "\n",
-    "acc_train = learning_metrics[[\"default_train_accuracy\", \"activation_train_accuracy\", \"lr_scheduler_train_accuracy\", \"sample_weighting_train_accuracy\", \"label_smoothing_train_accuracy\"]].dropna(how=\"any\").reset_index(drop=True).rolling(20).mean()\n",
-    "ax4.plot(acc_train.index, acc_train[\"default_train_accuracy\"], label=\"Default\", linewidth=1, zorder=100)\n",
-    "ax4.plot(acc_train.index, acc_train[\"activation_train_accuracy\"], label=\"Activation\", linewidth=1)\n",
-    "ax4.plot(acc_train.index, acc_train[\"label_smoothing_train_accuracy\"], label=\"Label Smoothing\", linewidth=1)\n",
-    "ax4.plot(acc_train.index, acc_train[\"lr_scheduler_train_accuracy\"], label=\"Lr Schedule\", linewidth=1)\n",
-    "ax4.plot(acc_train.index, acc_train[\"sample_weighting_train_accuracy\"], label=\"Sample Weighting\", linewidth=1)\n",
+    "acc_train = (\n",
+    "    learning_metrics[\n",
+    "        [\n",
+    "            \"default_train_accuracy\",\n",
+    "            \"activation_train_accuracy\",\n",
+    "            \"lr_scheduler_train_accuracy\",\n",
+    "            \"sample_weighting_train_accuracy\",\n",
+    "            \"label_smoothing_train_accuracy\",\n",
+    "        ]\n",
+    "    ]\n",
+    "    .dropna(how=\"any\")\n",
+    "    .reset_index(drop=True)\n",
+    "    .rolling(20)\n",
+    "    .mean()\n",
+    ")\n",
+    "ax4.plot(\n",
+    "    acc_train.index,\n",
+    "    acc_train[\"default_train_accuracy\"],\n",
+    "    label=\"Default\",\n",
+    "    linewidth=1,\n",
+    "    zorder=100,\n",
+    ")\n",
+    "ax4.plot(\n",
+    "    acc_train.index,\n",
+    "    acc_train[\"activation_train_accuracy\"],\n",
+    "    label=\"Activation\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax4.plot(\n",
+    "    acc_train.index,\n",
+    "    acc_train[\"label_smoothing_train_accuracy\"],\n",
+    "    label=\"Label Smoothing\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax4.plot(\n",
+    "    acc_train.index,\n",
+    "    acc_train[\"lr_scheduler_train_accuracy\"],\n",
+    "    label=\"Lr Schedule\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax4.plot(\n",
+    "    acc_train.index,\n",
+    "    acc_train[\"sample_weighting_train_accuracy\"],\n",
+    "    label=\"Sample Weighting\",\n",
+    "    linewidth=1,\n",
+    ")\n",
     "\n",
     "n_epochs = 10\n",
     "\n",
     "step_size = int(len(acc_train) / n_epochs)\n",
     "\n",
     "for i in range(step_size, step_size * n_epochs + 1, step_size):\n",
-    "    ax4.axvline(x=i, linestyle='--', color='grey', linewidth=0.5)\n",
+    "    ax4.axvline(x=i, linestyle=\"--\", color=\"grey\", linewidth=0.5)\n",
     "\n",
     "ax4.set_xlim(0, step_size * n_epochs - 1)\n",
     "ax4.set_ylim(0.67, 0.80)\n",
@@ -1670,17 +1944,45 @@
     "\n",
     "ax4.set_ylabel(\"Accuracy (Train)\")\n",
     "ax4.set_xlabel(\"Iteration\")\n",
-    "ax4.yaxis.set_major_formatter(PercentFormatter(1.0,decimals=2))\n",
-    "ax4.xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))\n",
-    "\n",
-    "loss_val = learning_metrics.groupby(\"default_epoch\")[[\"default_val_loss\", \"activation_val_loss\", \"lr_scheduler_val_loss\", \"sample_weighting_val_loss\", \"label_smoothing_val_loss\"]].mean()\n",
-    "\n",
-    "\n",
-    "ax3.plot(loss_val.index,loss_val[\"default_val_loss\"], label=\"Default\", linewidth=1, zorder=100)\n",
-    "ax3.plot(loss_val.index,loss_val[\"activation_val_loss\"], label=\"Activation\", linewidth=1)\n",
-    "ax3.plot(loss_val.index,loss_val[\"label_smoothing_val_loss\"], label=\"Label Smoothing\", linewidth=1)\n",
-    "ax3.plot(loss_val.index,loss_val[\"lr_scheduler_val_loss\"], label=\"Lr Schedule\", linewidth=1)\n",
-    "ax3.plot(loss_val.index,loss_val[\"sample_weighting_val_loss\"], label=\"Sample Weighting\", linewidth=1)\n",
+    "ax4.yaxis.set_major_formatter(PercentFormatter(1.0, decimals=2))\n",
+    "ax4.xaxis.set_major_formatter(StrMethodFormatter(\"{x:,.0f}\"))\n",
+    "\n",
+    "loss_val = learning_metrics.groupby(\"default_epoch\")[\n",
+    "    [\n",
+    "        \"default_val_loss\",\n",
+    "        \"activation_val_loss\",\n",
+    "        \"lr_scheduler_val_loss\",\n",
+    "        \"sample_weighting_val_loss\",\n",
+    "        \"label_smoothing_val_loss\",\n",
+    "    ]\n",
+    "].mean()\n",
+    "\n",
+    "\n",
+    "ax3.plot(\n",
+    "    loss_val.index,\n",
+    "    loss_val[\"default_val_loss\"],\n",
+    "    label=\"Default\",\n",
+    "    linewidth=1,\n",
+    "    zorder=100,\n",
+    ")\n",
+    "ax3.plot(\n",
+    "    loss_val.index, loss_val[\"activation_val_loss\"], label=\"Activation\", linewidth=1\n",
+    ")\n",
+    "ax3.plot(\n",
+    "    loss_val.index,\n",
+    "    loss_val[\"label_smoothing_val_loss\"],\n",
+    "    label=\"Label Smoothing\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax3.plot(\n",
+    "    loss_val.index, loss_val[\"lr_scheduler_val_loss\"], label=\"Lr Schedule\", linewidth=1\n",
+    ")\n",
+    "ax3.plot(\n",
+    "    loss_val.index,\n",
+    "    loss_val[\"sample_weighting_val_loss\"],\n",
+    "    label=\"Sample Weighting\",\n",
+    "    linewidth=1,\n",
+    ")\n",
     "ax3.set_ylabel(\"Log Loss (Val)\")\n",
     "# ax3.set_xlabel(\"Step\")\n",
     "n_epochs = 10\n",
@@ -1688,7 +1990,7 @@
     "step_size = int(len(loss_val) / n_epochs)\n",
     "\n",
     "for i in range(step_size, step_size * n_epochs + 1, step_size):\n",
-    "    ax3.axvline(x=i, linestyle='--', color='grey', linewidth=0.5)\n",
+    "    ax3.axvline(x=i, linestyle=\"--\", color=\"grey\", linewidth=0.5)\n",
     "\n",
     "# ax.set_ylim(0.6, 0.72)\n",
     "\n",
@@ -1696,14 +1998,45 @@
     "ax3.set_ylabel(\"Log Loss (Val)\")\n",
     "ax3.set_xlim(0, step_size * n_epochs - 1)\n",
     "\n",
-    "ax3.xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))\n",
-    "\n",
-    "acc_val = learning_metrics.groupby(\"default_epoch\")[[\"default_val_accuracy\", \"activation_val_accuracy\", \"lr_scheduler_val_accuracy\", \"sample_weighting_val_accuracy\", \"label_smoothing_val_accuracy\"]].mean()\n",
-    "ax1.plot(acc_val.index,acc_val[\"default_val_accuracy\"], label=\"Default\", linewidth=1, zorder=100)\n",
-    "ax1.plot(acc_val.index,acc_val[\"activation_val_accuracy\"], label=\"Activation\", linewidth=1)\n",
-    "ax1.plot(acc_val.index,acc_val[\"label_smoothing_val_accuracy\"], label=\"Label Smoothing\", linewidth=1)\n",
-    "ax1.plot(acc_val.index,acc_val[\"lr_scheduler_val_accuracy\"], label=\"Lr Schedule\", linewidth=1)\n",
-    "ax1.plot(acc_val.index,acc_val[\"sample_weighting_val_accuracy\"], label=\"Sample Weighting\", linewidth=1)\n",
+    "ax3.xaxis.set_major_formatter(StrMethodFormatter(\"{x:,.0f}\"))\n",
+    "\n",
+    "acc_val = learning_metrics.groupby(\"default_epoch\")[\n",
+    "    [\n",
+    "        \"default_val_accuracy\",\n",
+    "        \"activation_val_accuracy\",\n",
+    "        \"lr_scheduler_val_accuracy\",\n",
+    "        \"sample_weighting_val_accuracy\",\n",
+    "        \"label_smoothing_val_accuracy\",\n",
+    "    ]\n",
+    "].mean()\n",
+    "ax1.plot(\n",
+    "    acc_val.index,\n",
+    "    acc_val[\"default_val_accuracy\"],\n",
+    "    label=\"Default\",\n",
+    "    linewidth=1,\n",
+    "    zorder=100,\n",
+    ")\n",
+    "ax1.plot(\n",
+    "    acc_val.index, acc_val[\"activation_val_accuracy\"], label=\"Activation\", linewidth=1\n",
+    ")\n",
+    "ax1.plot(\n",
+    "    acc_val.index,\n",
+    "    acc_val[\"label_smoothing_val_accuracy\"],\n",
+    "    label=\"Label Smoothing\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax1.plot(\n",
+    "    acc_val.index,\n",
+    "    acc_val[\"lr_scheduler_val_accuracy\"],\n",
+    "    label=\"Lr Schedule\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax1.plot(\n",
+    "    acc_val.index,\n",
+    "    acc_val[\"sample_weighting_val_accuracy\"],\n",
+    "    label=\"Sample Weighting\",\n",
+    "    linewidth=1,\n",
+    ")\n",
     "ax1.set_ylabel(\"Log Loss (Val)\")\n",
     "ax1.set_xlabel(\"Epoch\")\n",
     "n_epochs = 10\n",
@@ -1711,22 +2044,31 @@
     "step_size = int(len(loss_val) / n_epochs)\n",
     "\n",
     "for i in range(step_size, step_size * n_epochs + 1, step_size):\n",
-    "    ax1.axvline(x=i, linestyle='--', color='grey', linewidth=0.5)\n",
+    "    ax1.axvline(x=i, linestyle=\"--\", color=\"grey\", linewidth=0.5)\n",
     "\n",
     "ax1.set_xlim(0, step_size * n_epochs - 1)\n",
     "# ax.set_ylim(0.6, 0.72)\n",
     "\n",
     "ax1.set_xlabel(\"Epoch\")\n",
     "ax1.set_ylabel(\"Accuracy (Val)\")\n",
-    "ax1.yaxis.set_major_formatter(PercentFormatter(1.0,decimals=2))\n",
-    "ax1.xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))\n",
+    "ax1.yaxis.set_major_formatter(PercentFormatter(1.0, decimals=2))\n",
+    "ax1.xaxis.set_major_formatter(StrMethodFormatter(\"{x:,.0f}\"))\n",
     "\n",
     "handles, labels = ax1.get_legend_handles_labels()\n",
-    "fig.legend(handles, labels, loc='lower center', frameon=False, ncol=3, bbox_to_anchor = (0, -0.07, 1, 1))\n",
+    "fig.legend(\n",
+    "    handles,\n",
+    "    labels,\n",
+    "    loc=\"lower center\",\n",
+    "    frameon=False,\n",
+    "    ncol=3,\n",
+    "    bbox_to_anchor=(0, -0.07, 1, 1),\n",
+    ")\n",
     "\n",
     "plt.tight_layout()\n",
     "\n",
-    "plt.savefig(f\"../reports/Graphs/fttransformer-optimisations-loss-acc.pdf\", bbox_inches=\"tight\")"
+    "plt.savefig(\n",
+    "    \"../reports/Graphs/fttransformer-optimisations-loss-acc.pdf\", bbox_inches=\"tight\"\n",
+    ")"
    ]
   },
   {
@@ -1757,17 +2099,19 @@
    "outputs": [],
    "source": [
     "# visualize learning curves\n",
-    "with open(Path(model_dir,model_name[:-4]+\"_training.json\"), 'r') as j:\n",
-    "     contents = json.loads(j.read())\n",
-    "    \n",
+    "with open(Path(model_dir, model_name[:-4] + \"_training.json\")) as j:\n",
+    "    contents = json.loads(j.read())\n",
+    "\n",
     "# extract relevant keys\n",
     "iterations = contents.get(\"iterations\")\n",
-    "test_metrics = [d['name'] for d in contents['meta']['test_metrics'] ]\n",
-    "test_results = [d['test'] for d in iterations]\n",
-    "learn_metrics = [d['name'] for d in contents['meta']['learn_metrics'] ]\n",
-    "learn_results = [d['learn'] for d in iterations]\n",
+    "test_metrics = [d[\"name\"] for d in contents[\"meta\"][\"test_metrics\"]]\n",
+    "test_results = [d[\"test\"] for d in iterations]\n",
+    "learn_metrics = [d[\"name\"] for d in contents[\"meta\"][\"learn_metrics\"]]\n",
+    "learn_results = [d[\"learn\"] for d in iterations]\n",
     "\n",
-    "metrics_learn = pd.DataFrame(learn_results, columns=learn_metrics).add_suffix(\" (train)\")\n",
+    "metrics_learn = pd.DataFrame(learn_results, columns=learn_metrics).add_suffix(\n",
+    "    \" (train)\"\n",
+    ")\n",
     "metrics_test = pd.DataFrame(test_results, columns=test_metrics).add_suffix(\" (val)\")\n",
     "\n",
     "learning_metrics = pd.concat([metrics_learn, metrics_test], axis=1)"
@@ -1828,23 +2172,50 @@
    },
    "outputs": [],
    "source": [
-    "fig, (ax2, ax1) = plt.subplots(2, 1, figsize=(12*CM,7.5*CM), sharex=True)\n",
+    "fig, (ax2, ax1) = plt.subplots(2, 1, figsize=(12 * CM, 7.5 * CM), sharex=True)\n",
     "\n",
     "# plot accuracy\n",
-    "ax1.plot(learning_metrics.index,learning_metrics[\"default_learn_acc\"], label=\"Train\", linewidth=1)\n",
-    "ax1.plot(learning_metrics.index,learning_metrics[\"default_val_acc\"], label=\"Val\",linewidth=1)\n",
-    "ax1.yaxis.set_major_formatter(PercentFormatter(1.0,decimals=2))\n",
+    "ax1.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"default_learn_acc\"],\n",
+    "    label=\"Train\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax1.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"default_val_acc\"],\n",
+    "    label=\"Val\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax1.yaxis.set_major_formatter(PercentFormatter(1.0, decimals=2))\n",
     "ax1.set_ylabel(\"Accuracy\")\n",
-    "ax1.xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))\n",
+    "ax1.xaxis.set_major_formatter(StrMethodFormatter(\"{x:,.0f}\"))\n",
     "ax1.set_xlabel(\"Iterations\")\n",
     "\n",
     "# plot log loss\n",
-    "ax2.plot(learning_metrics.index,learning_metrics[\"default_learn_log\"], label=\"Train\",linewidth=1)    \n",
-    "ax2.plot(learning_metrics.index,learning_metrics[\"default_val_log\"], label=\"Val\",linewidth=1)   \n",
+    "ax2.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"default_learn_log\"],\n",
+    "    label=\"Train\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax2.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"default_val_log\"],\n",
+    "    label=\"Val\",\n",
+    "    linewidth=1,\n",
+    ")\n",
     "ax2.set_ylabel(\"Log Loss\")\n",
     "\n",
     "handles, labels = ax1.get_legend_handles_labels()\n",
-    "fig.legend(handles, labels, loc='lower center', frameon=False, ncol=4, bbox_to_anchor = (0, -0.03, 1, 1))\n",
+    "fig.legend(\n",
+    "    handles,\n",
+    "    labels,\n",
+    "    loc=\"lower center\",\n",
+    "    frameon=False,\n",
+    "    ncol=4,\n",
+    "    bbox_to_anchor=(0, -0.03, 1, 1),\n",
+    ")\n",
     "\n",
     "plt.tight_layout()\n",
     "\n",
@@ -1864,38 +2235,87 @@
    },
    "outputs": [],
    "source": [
-    "fig, (ax2, ax1) = plt.subplots(2, 1, figsize=(12*CM,7.5*CM), sharex=True)\n",
+    "fig, (ax2, ax1) = plt.subplots(2, 1, figsize=(12 * CM, 7.5 * CM), sharex=True)\n",
     "\n",
     "# [\"default\", \"depth\", \"early_stopping\", \"border_count\", \"grow_policy\", \"exp_weighting\"]\n",
     "\n",
     "# plot accuracy\n",
-    "ax1.plot(learning_metrics.index,learning_metrics[\"default_val_acc\"], label=\"Default\", linewidth=1, zorder=100)\n",
-    "ax1.plot(learning_metrics.index,learning_metrics[\"early_stopping_val_acc\"], label=\"Early Stopping\", linewidth=1)\n",
-    "ax1.plot(learning_metrics.index,learning_metrics[\"grow_policy_val_acc\"], label=\"Grow Policy\", linewidth=1)\n",
-    "ax1.plot(learning_metrics.index,learning_metrics[\"exp_weighting_val_acc\"], label=\"Sample Weighting\", linewidth=1)\n",
+    "ax1.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"default_val_acc\"],\n",
+    "    label=\"Default\",\n",
+    "    linewidth=1,\n",
+    "    zorder=100,\n",
+    ")\n",
+    "ax1.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"early_stopping_val_acc\"],\n",
+    "    label=\"Early Stopping\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax1.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"grow_policy_val_acc\"],\n",
+    "    label=\"Grow Policy\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax1.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"exp_weighting_val_acc\"],\n",
+    "    label=\"Sample Weighting\",\n",
+    "    linewidth=1,\n",
+    ")\n",
     "\n",
-    "ax1.yaxis.set_major_formatter(PercentFormatter(1.0,decimals=2))\n",
+    "ax1.yaxis.set_major_formatter(PercentFormatter(1.0, decimals=2))\n",
     "ax1.set_ylabel(\"Accuracy (Val)\")\n",
-    "ax1.xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))\n",
-    "#ax1.set_ylim(0.57, 0.64)\n",
+    "ax1.xaxis.set_major_formatter(StrMethodFormatter(\"{x:,.0f}\"))\n",
+    "# ax1.set_ylim(0.57, 0.64)\n",
     "ax1.set_xlabel(\"Iterations\")\n",
     "\n",
     "# plot log loss\n",
-    "ax2.plot(learning_metrics.index,learning_metrics[\"default_val_log\"], label=\"Default\", linewidth=1, zorder=100)\n",
-    "ax2.plot(learning_metrics.index,learning_metrics[\"early_stopping_val_log\"], label=\"Early Stopping\", linewidth=1)\n",
-    "ax2.plot(learning_metrics.index,learning_metrics[\"grow_policy_val_log\"], label=\"Grow Policy\", linewidth=1)\n",
-    "ax2.plot(learning_metrics.index,learning_metrics[\"exp_weighting_val_log\"], label=\"Sample Weighting\", linewidth=1)\n",
+    "ax2.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"default_val_log\"],\n",
+    "    label=\"Default\",\n",
+    "    linewidth=1,\n",
+    "    zorder=100,\n",
+    ")\n",
+    "ax2.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"early_stopping_val_log\"],\n",
+    "    label=\"Early Stopping\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax2.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"grow_policy_val_log\"],\n",
+    "    label=\"Grow Policy\",\n",
+    "    linewidth=1,\n",
+    ")\n",
+    "ax2.plot(\n",
+    "    learning_metrics.index,\n",
+    "    learning_metrics[\"exp_weighting_val_log\"],\n",
+    "    label=\"Sample Weighting\",\n",
+    "    linewidth=1,\n",
+    ")\n",
     "ax2.set_ylabel(\"Log Loss (Val)\")\n",
     "# ax2.set_ylim(0.55, 0.7)\n",
     "\n",
     "plt.tight_layout()\n",
     "\n",
     "handles, labels = ax2.get_legend_handles_labels()\n",
-    "fig.legend(handles, labels, loc='lower center', frameon=False, ncol=4, bbox_to_anchor = (0, -0.03, 1, 1))\n",
+    "fig.legend(\n",
+    "    handles,\n",
+    "    labels,\n",
+    "    loc=\"lower center\",\n",
+    "    frameon=False,\n",
+    "    ncol=4,\n",
+    "    bbox_to_anchor=(0, -0.03, 1, 1),\n",
+    ")\n",
     "\n",
     "plt.tight_layout()\n",
     "\n",
-    "plt.savefig(f\"../reports/Graphs/gbm-optimisations-loss-acc.pdf\", bbox_inches=\"tight\")"
+    "plt.savefig(\"../reports/Graphs/gbm-optimisations-loss-acc.pdf\", bbox_inches=\"tight\")"
    ]
   },
   {
@@ -1916,8 +2336,7 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
-    "from torch import optim\n",
-    "from torch import nn\n",
+    "from torch import nn, optim\n",
     "\n",
     "\n",
     "class CosineWarmupScheduler(optim.lr_scheduler._LRScheduler):\n",
@@ -1934,7 +2353,7 @@
     "        lr_factor = 0.5 * (1 + np.cos(np.pi * epoch / self.max_num_iters))\n",
     "        if epoch <= self.warmup:\n",
     "            lr_factor *= epoch * 1.0 / self.warmup\n",
-    "        return lr_factor\n"
+    "        return lr_factor"
    ]
   },
   {
@@ -1984,7 +2403,7 @@
    "source": [
     "lr = 1e-3\n",
     "\n",
-    "factor = [scheduler.get_lr_factor(i) * lr  for i in range(0, max_iters)]\n",
+    "factor = [scheduler.get_lr_factor(i) * lr for i in range(max_iters)]\n",
     "\n",
     "fig = plt.figure(figsize=(12 * CM, 3.5 * CM))\n",
     "\n",
@@ -1992,7 +2411,7 @@
     "plt.xlabel(\"Iteration\")\n",
     "plt.ylabel(\"Learning Rate\")\n",
     "\n",
-    "plt.savefig(f\"lr-lin-warmup-cosine-decay.pdf\", bbox_inches=\"tight\")"
+    "plt.savefig(\"lr-lin-warmup-cosine-decay.pdf\", bbox_inches=\"tight\")"
    ]
   },
   {
@@ -2015,12 +2434,21 @@
     "# TODO: replace with versioned results\n",
     "sample_size = 256\n",
     "\n",
-    "fi_classical = pd.read_parquet(f\"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_test_classical_feature_importance_{sample_size}.parquet\")\n",
-    "fi_gbm = pd.read_parquet(f\"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_test_gbm_feature_importance_{sample_size}.parquet\")\n",
-    "fi_transformer = pd.read_parquet(f\"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_test_fttransformer_feature_importance_{sample_size}.parquet\")\n",
+    "fi_classical = pd.read_parquet(\n",
+    "    f\"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_test_classical_feature_importance_{sample_size}.parquet\"\n",
+    ")\n",
+    "fi_gbm = pd.read_parquet(\n",
+    "    f\"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_test_gbm_feature_importance_{sample_size}.parquet\"\n",
+    ")\n",
+    "fi_transformer = pd.read_parquet(\n",
+    "    f\"gs://thesis-bucket-option-trade-classification/data/results/ise_supervised_test_fttransformer_feature_importance_{sample_size}.parquet\"\n",
+    ")\n",
     "\n",
     "# set features to nan that are not part of dataset\n",
-    "fi_classical.loc[[\"size_ex (grouped)\", \"TRADE_SIZE\"],[\"quote(best)->quote(ex) values\",\"quote(best)->quote(ex) std\"]] = np.NaN"
+    "fi_classical.loc[\n",
+    "    [\"size_ex (grouped)\", \"TRADE_SIZE\"],\n",
+    "    [\"quote(best)->quote(ex) values\", \"quote(best)->quote(ex) std\"],\n",
+    "] = np.nan"
    ]
   },
   {
@@ -2053,39 +2481,118 @@
    },
    "outputs": [],
    "source": [
-    "fig, axes = plt.subplots(1, 3, figsize=(18*CM, 12*CM), sharex=False, sharey=True)\n",
+    "fig, axes = plt.subplots(1, 3, figsize=(18 * CM, 12 * CM), sharex=False, sharey=True)\n",
     "\n",
     "# adapted from here: https://stackoverflow.com/a/15214551/5755604\n",
     "ind = np.arange(len(fi))\n",
     "width = 0.25\n",
     "\n",
-    "semi = \"\"#\"semi-\"\n",
+    "semi = \"\"  # \"semi-\"\n",
     "\n",
-    "axes[0].barh(ind, fi[\"quote(best)->quote(ex)->rev_tick(all) values\"], width, xerr=fi[\"quote(best)->quote(ex)->rev_tick(all) std\"], label=f\"GSU\")\n",
-    "axes[0].barh(ind+width, fi[f\"gbm({semi}classical) values\"], width, xerr=fi[f\"gbm({semi}classical) std\"], label=f\"{semi}GBRT\")\n",
-    "axes[0].barh(ind+width + width, fi[f\"fttransformer({semi}classical) values\"], width, xerr=fi[f\"fttransformer({semi}classical) std\"], label=f\"{semi}Transformer\")\n",
+    "axes[0].barh(\n",
+    "    ind,\n",
+    "    fi[\"quote(best)->quote(ex)->rev_tick(all) values\"],\n",
+    "    width,\n",
+    "    xerr=fi[\"quote(best)->quote(ex)->rev_tick(all) std\"],\n",
+    "    label=\"GSU\",\n",
+    ")\n",
+    "axes[0].barh(\n",
+    "    ind + width,\n",
+    "    fi[f\"gbm({semi}classical) values\"],\n",
+    "    width,\n",
+    "    xerr=fi[f\"gbm({semi}classical) std\"],\n",
+    "    label=f\"{semi}GBRT\",\n",
+    ")\n",
+    "axes[0].barh(\n",
+    "    ind + width + width,\n",
+    "    fi[f\"fttransformer({semi}classical) values\"],\n",
+    "    width,\n",
+    "    xerr=fi[f\"fttransformer({semi}classical) std\"],\n",
+    "    label=f\"{semi}Transformer\",\n",
+    ")\n",
     "# axes[0].barh(ind+width + width + width, fi[\"fttransformer(semi-classical) values\"], width, xerr=fi[\"fttransformer(semi-classical) std\"], label=\"Transformer (Pre-Train)\")\n",
-    "axes[0].axvline(0, color='black', linestyle='--', linewidth=0.5)\n",
+    "axes[0].axvline(0, color=\"black\", linestyle=\"--\", linewidth=0.5)\n",
     "axes[0].set_xlim([-0.15, 0.15])\n",
     "\n",
-    "axes[1].barh(ind, fi[\"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all) values\"], width, xerr=fi[\"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all) std\"], label=\"GSU\")\n",
-    "axes[1].barh(ind+width, fi[f\"gbm({semi}classical-size) values\"], width, xerr=fi[f\"gbm({semi}classical-size) std\"], label=f\"{semi}GBRT\")\n",
-    "axes[1].barh(ind+width + width, fi[f\"fttransformer({semi}classical-size) values\"], width, xerr=fi[f\"fttransformer({semi}classical-size) std\"], label=f\"{semi}Transformer\")\n",
+    "axes[1].barh(\n",
+    "    ind,\n",
+    "    fi[\n",
+    "        \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all) values\"\n",
+    "    ],\n",
+    "    width,\n",
+    "    xerr=fi[\n",
+    "        \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all) std\"\n",
+    "    ],\n",
+    "    label=\"GSU\",\n",
+    ")\n",
+    "axes[1].barh(\n",
+    "    ind + width,\n",
+    "    fi[f\"gbm({semi}classical-size) values\"],\n",
+    "    width,\n",
+    "    xerr=fi[f\"gbm({semi}classical-size) std\"],\n",
+    "    label=f\"{semi}GBRT\",\n",
+    ")\n",
+    "axes[1].barh(\n",
+    "    ind + width + width,\n",
+    "    fi[f\"fttransformer({semi}classical-size) values\"],\n",
+    "    width,\n",
+    "    xerr=fi[f\"fttransformer({semi}classical-size) std\"],\n",
+    "    label=f\"{semi}Transformer\",\n",
+    ")\n",
     "# axes[1].barh(ind+width + width + width, fi[\"fttransformer(semi-classical-size) values\"], width, xerr=fi[\"fttransformer(semi-classical-size) std\"], label=\"Transformer (Pre-Train)\")\n",
-    "axes[1].axvline(0, color='black', linestyle='--', linewidth=0.5)\n",
+    "axes[1].axvline(0, color=\"black\", linestyle=\"--\", linewidth=0.5)\n",
     "axes[1].set_xlim([-0.15, 0.15])\n",
     "\n",
-    "axes[2].barh(ind, fi[\"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all) values\"], width, xerr=fi[\"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all) std\"], label=\"GSU\")\n",
-    "axes[2].barh(ind+width, fi[f\"gbm({semi}ml) values\"], width, xerr=fi[f\"gbm({semi}ml) std\"], label=f\"{semi}GBRT\")\n",
-    "axes[2].barh(ind+width + width, fi[f\"fttransformer({semi}ml) values\"], width, xerr=fi[f\"fttransformer({semi}ml) std\"], label=f\"{semi}Transformer\")\n",
+    "axes[2].barh(\n",
+    "    ind,\n",
+    "    fi[\n",
+    "        \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all) values\"\n",
+    "    ],\n",
+    "    width,\n",
+    "    xerr=fi[\n",
+    "        \"trade_size(ex)->quote(best)->quote(ex)->depth(best)->depth(ex)->rev_tick(all) std\"\n",
+    "    ],\n",
+    "    label=\"GSU\",\n",
+    ")\n",
+    "axes[2].barh(\n",
+    "    ind + width,\n",
+    "    fi[f\"gbm({semi}ml) values\"],\n",
+    "    width,\n",
+    "    xerr=fi[f\"gbm({semi}ml) std\"],\n",
+    "    label=f\"{semi}GBRT\",\n",
+    ")\n",
+    "axes[2].barh(\n",
+    "    ind + width + width,\n",
+    "    fi[f\"fttransformer({semi}ml) values\"],\n",
+    "    width,\n",
+    "    xerr=fi[f\"fttransformer({semi}ml) std\"],\n",
+    "    label=f\"{semi}Transformer\",\n",
+    ")\n",
     "# axes[2].barh(ind+width + width + width, fi[\"fttransformer(semi-ml) values\"], width, xerr=fi[\"fttransformer(semi-ml) std\"], label=\"Transformer (Pre-Train)\")\n",
-    "axes[2].axvline(0, color='black', linestyle='--', linewidth=0.5)\n",
+    "axes[2].axvline(0, color=\"black\", linestyle=\"--\", linewidth=0.5)\n",
     "axes[2].set_xlim([-0.15, 0.15])\n",
     "\n",
     "\n",
     "# set y-labels\n",
-    "labels = ['Price Lead All (Group)', 'Price Lag All (Group)', 'Price Lead Ex (Group)', 'Price  Lag Ex (Group)', 'Quotes NBBO (Group)', 'Quotes Ex (Group)', 'Trade Price', \"Quotes Size (Group)\", 'Trade Size', 'Strike Price', 'Time To Maturity', 'Option Type', 'Root', 'Moneyness', \"Day Volume\", 'Issue Type']\n",
-    "axes[0].set(yticks=ind + width, yticklabels=labels, ylim=[2*width - 1, len(fi)])\n",
+    "labels = [\n",
+    "    \"Price Lead All (Group)\",\n",
+    "    \"Price Lag All (Group)\",\n",
+    "    \"Price Lead Ex (Group)\",\n",
+    "    \"Price  Lag Ex (Group)\",\n",
+    "    \"Quotes NBBO (Group)\",\n",
+    "    \"Quotes Ex (Group)\",\n",
+    "    \"Trade Price\",\n",
+    "    \"Quotes Size (Group)\",\n",
+    "    \"Trade Size\",\n",
+    "    \"Strike Price\",\n",
+    "    \"Time To Maturity\",\n",
+    "    \"Option Type\",\n",
+    "    \"Root\",\n",
+    "    \"Moneyness\",\n",
+    "    \"Day Volume\",\n",
+    "    \"Issue Type\",\n",
+    "]\n",
+    "axes[0].set(yticks=ind + width, yticklabels=labels, ylim=[2 * width - 1, len(fi)])\n",
     "\n",
     "# set x-labels\n",
     "axes[0].set_xlabel(r\"SAGE Value\")\n",
@@ -2098,8 +2605,15 @@
     "axes[2].set_title(\"FS Option\")\n",
     "\n",
     "handles, labels = axes[0].get_legend_handles_labels()\n",
-    "labels = [l.replace(\"semi-\",\"(Semi) \") for l in labels]\n",
-    "fig.legend(handles, labels, loc = \"lower center\", frameon=False, bbox_to_anchor=(0.5, -0.05), ncols=3)\n",
+    "labels = [l.replace(\"semi-\", \"(Semi) \") for l in labels]\n",
+    "fig.legend(\n",
+    "    handles,\n",
+    "    labels,\n",
+    "    loc=\"lower center\",\n",
+    "    frameon=False,\n",
+    "    bbox_to_anchor=(0.5, -0.05),\n",
+    "    ncols=3,\n",
+    ")\n",
     "\n",
     "plt.tight_layout()\n",
     "\n",
diff --git a/notebooks/6.0f-mb-viz-gradient-boosting.ipynb b/notebooks/6.0f-mb-viz-gradient-boosting.ipynb
index 6c7e5414..0bfc039c 100644
--- a/notebooks/6.0f-mb-viz-gradient-boosting.ipynb
+++ b/notebooks/6.0f-mb-viz-gradient-boosting.ipynb
@@ -14,7 +14,6 @@
     "import sys\n",
     "from pathlib import Path\n",
     "\n",
-    "\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "import wandb\n",
@@ -25,7 +24,7 @@
     "from otc.features.build_features import (\n",
     "    features_categorical,\n",
     "    features_classical,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -52,7 +51,7 @@
    "source": [
     "# key used for files and artefacts\n",
     "key = f\"{EXCHANGE}_gbm_{STRATEGY}_{SUBSET}_viz\"\n",
-    "dataset = f\"fbv/thesis/{EXCHANGE}_{STRATEGY}_log_standardized_clipped:latest\"\n"
+    "dataset = f\"fbv/thesis/{EXCHANGE}_{STRATEGY}_log_standardized_clipped:latest\""
    ]
   },
   {
@@ -65,7 +64,7 @@
    "outputs": [],
    "source": [
     "# set project name. Required to access files and artefacts\n",
-    "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n"
+    "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\""
    ]
   },
   {
@@ -86,7 +85,7 @@
     "run = wandb.init(project=\"thesis\", entity=\"fbv\")\n",
     "\n",
     "artifact = run.use_artifact(dataset)\n",
-    "data_dir = artifact.download()\n"
+    "data_dir = artifact.download()"
    ]
   },
   {
@@ -98,28 +97,29 @@
    },
    "outputs": [],
    "source": [
-    "\n",
     "train = pd.read_parquet(Path(data_dir, \"train_set.parquet\"), engine=\"fastparquet\")\n",
     "y_train = train[\"buy_sell\"]\n",
     "X_train = train.drop(columns=\"buy_sell\")\n",
     "timestamp_train = np.linspace(0, 1, len(y_train))\n",
     "weights_exp_train = np.geomspace(0.001, 1, num=len(y_train))\n",
     "\n",
-    "cat_features_sub = [tup[0] for tup in features_categorical if tup[0] in features_classical]\n",
+    "cat_features_sub = [\n",
+    "    tup[0] for tup in features_categorical if tup[0] in features_classical\n",
+    "]\n",
     "\n",
     "train_pool_uni = Pool(\n",
-    "            data=X_train.loc[:, features_classical],\n",
-    "            label=y_train,\n",
-    "            cat_features=cat_features_sub,\n",
-    "            timestamp=timestamp_train,\n",
+    "    data=X_train.loc[:, features_classical],\n",
+    "    label=y_train,\n",
+    "    cat_features=cat_features_sub,\n",
+    "    timestamp=timestamp_train,\n",
     ")\n",
     "\n",
     "train_pool_exp = Pool(\n",
-    "            data=X_train.loc[:, features_classical],\n",
-    "            label=y_train,\n",
-    "            cat_features=cat_features_sub,\n",
-    "            timestamp=timestamp_train,\n",
-    "            weight=weights_exp_train,\n",
+    "    data=X_train.loc[:, features_classical],\n",
+    "    label=y_train,\n",
+    "    cat_features=cat_features_sub,\n",
+    "    timestamp=timestamp_train,\n",
+    "    weight=weights_exp_train,\n",
     ")\n",
     "\n",
     "val = pd.read_parquet(Path(data_dir, \"val_set.parquet\"), engine=\"fastparquet\")\n",
@@ -128,11 +128,11 @@
     "timestamp_val = np.linspace(0, 1, len(y_val))\n",
     "\n",
     "val_pool_uni = Pool(\n",
-    "            data=X_val.loc[:, features_classical],\n",
-    "            label=y_val,\n",
-    "            cat_features=cat_features_sub,\n",
-    "            timestamp=timestamp_val,\n",
-    ")   "
+    "    data=X_val.loc[:, features_classical],\n",
+    "    label=y_val,\n",
+    "    cat_features=cat_features_sub,\n",
+    "    timestamp=timestamp_val,\n",
+    ")"
    ]
   },
   {
@@ -154,23 +154,30 @@
     "    \"logging_level\": \"Silent\",\n",
     "    \"task_type\": \"GPU\",\n",
     "    \"random_seed\": 42,\n",
-    "    \"eval_metric\": \"Accuracy\"\n",
+    "    \"eval_metric\": \"Accuracy\",\n",
     "}\n",
     "\n",
-    "settings = [{\"iterations\": 5}, {\"iterations\": 100}, {\"iterations\": 1000}, {\"iterations\": 2000}]\n",
-    "[setting.update(kwargs_shared) for setting in settings] \n",
-    "             \n",
+    "settings = [\n",
+    "    {\"iterations\": 5},\n",
+    "    {\"iterations\": 100},\n",
+    "    {\"iterations\": 1000},\n",
+    "    {\"iterations\": 2000},\n",
+    "]\n",
+    "[setting.update(kwargs_shared) for setting in settings]\n",
+    "\n",
     "results = []\n",
     "\n",
     "for setting in tqdm(settings):\n",
     "    clf = CatBoostClassifier(**setting)\n",
     "    clf.fit(train_pool_uni, eval_set=val_pool_uni)\n",
-    "    \n",
+    "\n",
     "    proba_predictions = clf.predict_proba(val_pool_uni)\n",
     "    positive_class_prob = proba_predictions[:, 1]\n",
     "    y_val_mapped = (y_val + 1) // 2\n",
-    "    \n",
-    "    result = -np.log(positive_class_prob) * y_val_mapped - np.log(1 - positive_class_prob) * (1 - y_val_mapped)\n",
+    "\n",
+    "    result = -np.log(positive_class_prob) * y_val_mapped - np.log(\n",
+    "        1 - positive_class_prob\n",
+    "    ) * (1 - y_val_mapped)\n",
     "    results.append(result)"
    ]
   },
@@ -182,13 +189,11 @@
    },
    "outputs": [],
    "source": [
-    "dfs = pd.concat(results, axis=1, keys = [\"iter_5\", \"iter_100\", \"iter_1000\", \"iter_2000\"])\n",
+    "dfs = pd.concat(results, axis=1, keys=[\"iter_5\", \"iter_100\", \"iter_1000\", \"iter_2000\"])\n",
     "key = f\"{EXCHANGE}_gbm_{STRATEGY}_{SUBSET}_viz_dist_loss\"\n",
     "\n",
-    "output_path = (\n",
-    "    f\"gs://thesis-bucket-option-trade-classification/data/results/{key}-viz-dist-loss.parquet\"\n",
-    ")\n",
-    "dfs.columns = ['_'.join(col).rstrip('_') for col in dfs.columns.values]\n",
+    "output_path = f\"gs://thesis-bucket-option-trade-classification/data/results/{key}-viz-dist-loss.parquet\"\n",
+    "dfs.columns = [\"_\".join(col).rstrip(\"_\") for col in dfs.columns.values]\n",
     "dfs.to_parquet(output_path)\n",
     "\n",
     "# Log the artifact to save it as an output of this run\n",
@@ -196,7 +201,7 @@
     "result_set.add_reference(output_path, name=\"results\")\n",
     "run.log_artifact(result_set)\n",
     "\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   },
   {
@@ -240,11 +245,32 @@
     "\n",
     "\n",
     "# complete config\n",
-    "settings = [{}, kwargs_depth, kwargs_earl_stopping, kwargs_border_count, kwargs_growth_strategy, {}]\n",
-    "[setting.update(kwargs_shared) for setting in settings] \n",
+    "settings = [\n",
+    "    {},\n",
+    "    kwargs_depth,\n",
+    "    kwargs_earl_stopping,\n",
+    "    kwargs_border_count,\n",
+    "    kwargs_growth_strategy,\n",
+    "    {},\n",
+    "]\n",
+    "[setting.update(kwargs_shared) for setting in settings]\n",
     "# set pools\n",
-    "pools = [train_pool_uni, train_pool_uni, train_pool_uni, train_pool_uni, train_pool_uni, train_pool_exp]\n",
-    "identifier = [\"default\", \"depth\", \"early_stopping\", \"border_count\", \"grow_policy\", \"exp_weighting\"]"
+    "pools = [\n",
+    "    train_pool_uni,\n",
+    "    train_pool_uni,\n",
+    "    train_pool_uni,\n",
+    "    train_pool_uni,\n",
+    "    train_pool_uni,\n",
+    "    train_pool_exp,\n",
+    "]\n",
+    "identifier = [\n",
+    "    \"default\",\n",
+    "    \"depth\",\n",
+    "    \"early_stopping\",\n",
+    "    \"border_count\",\n",
+    "    \"grow_policy\",\n",
+    "    \"exp_weighting\",\n",
+    "]"
    ]
   },
   {
@@ -281,14 +307,21 @@
     "\n",
     "for result in results:\n",
     "    key = list(result.keys())[0]\n",
-    "    \n",
+    "\n",
     "    learn_acc = result[key][\"learn\"][\"Accuracy\"]\n",
     "    learn_log = result[key][\"learn\"][\"Logloss\"]\n",
     "    val_acc = result[key][\"validation\"][\"Accuracy\"]\n",
     "    val_log = result[key][\"validation\"][\"Logloss\"]\n",
-    "    \n",
-    "    df = pd.DataFrame({\"learn_acc\" :learn_acc, \"learn_log\":learn_log, \"val_acc\": val_acc, \"val_log\": val_log})\n",
-    "    df.name=key\n",
+    "\n",
+    "    df = pd.DataFrame(\n",
+    "        {\n",
+    "            \"learn_acc\": learn_acc,\n",
+    "            \"learn_log\": learn_log,\n",
+    "            \"val_acc\": val_acc,\n",
+    "            \"val_log\": val_log,\n",
+    "        }\n",
+    "    )\n",
+    "    df.name = key\n",
     "    dfs.append(df)"
    ]
   },
@@ -300,12 +333,10 @@
    },
    "outputs": [],
    "source": [
-    "dfs = pd.concat(dfs, axis=1, keys = identifier)\n",
+    "dfs = pd.concat(dfs, axis=1, keys=identifier)\n",
     "\n",
-    "output_path = (\n",
-    "    f\"gs://thesis-bucket-option-trade-classification/data/results/{key}-viz-losses.parquet\"\n",
-    ")\n",
-    "dfs.columns = ['_'.join(col).rstrip('_') for col in dfs.columns.values]\n",
+    "output_path = f\"gs://thesis-bucket-option-trade-classification/data/results/{key}-viz-losses.parquet\"\n",
+    "dfs.columns = [\"_\".join(col).rstrip(\"_\") for col in dfs.columns.values]\n",
     "dfs.to_parquet(output_path)\n",
     "\n",
     "# Log the artifact to save it as an output of this run\n",
@@ -313,7 +344,7 @@
     "result_set.add_reference(output_path, name=\"results\")\n",
     "run.log_artifact(result_set)\n",
     "\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   }
  ],
diff --git a/notebooks/6.0g-mb-viz-fttransformer.ipynb b/notebooks/6.0g-mb-viz-fttransformer.ipynb
index 36a99e9a..2cca37bc 100644
--- a/notebooks/6.0g-mb-viz-fttransformer.ipynb
+++ b/notebooks/6.0g-mb-viz-fttransformer.ipynb
@@ -16,22 +16,20 @@
     "\n",
     "import numpy as np\n",
     "import pandas as pd\n",
-    "\n",
-    "import wandb\n",
     "import torch\n",
-    "from torch import optim, nn\n",
+    "import wandb\n",
+    "from torch import nn, optim\n",
     "from tqdm.auto import tqdm\n",
     "\n",
     "sys.path.append(\"..\")\n",
+    "from otc.data.dataloader import TabDataLoader\n",
+    "from otc.data.dataset import TabDataset\n",
     "from otc.features.build_features import (\n",
     "    features_classical,\n",
     ")\n",
+    "from otc.models.activation import GeGLU, ReGLU\n",
     "from otc.models.fttransformer import FeatureTokenizer, FTTransformer, Transformer\n",
-    "from otc.models.activation import ReGLU, GeGLU\n",
-    "from otc.data.dataset import TabDataset\n",
-    "from otc.data.dataloader import TabDataLoader\n",
-    "from otc.features.build_features import features_classical\n",
-    "from otc.optim.scheduler import CosineWarmupScheduler\n"
+    "from otc.optim.scheduler import CosineWarmupScheduler"
    ]
   },
   {
@@ -43,7 +41,7 @@
     "# set globally here\n",
     "EXCHANGE = \"ise\"  # \"cboe\"\n",
     "STRATEGY = \"supervised\"  # \"transfer\"\n",
-    "SUBSET = \"test\"  # \"all\"\n"
+    "SUBSET = \"test\"  # \"all\""
    ]
   },
   {
@@ -54,7 +52,7 @@
    "source": [
     "# key used for files and artefacts\n",
     "key = f\"{EXCHANGE}_fttransformer_{STRATEGY}_{SUBSET}_viz\"\n",
-    "dataset = f\"fbv/thesis/{EXCHANGE}_{STRATEGY}_log_standardized_clipped:latest\"\n"
+    "dataset = f\"fbv/thesis/{EXCHANGE}_{STRATEGY}_log_standardized_clipped:latest\""
    ]
   },
   {
@@ -66,7 +64,7 @@
    "outputs": [],
    "source": [
     "# set project name. Required to access files and artefacts\n",
-    "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n"
+    "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\""
    ]
   },
   {
@@ -91,7 +89,7 @@
     "val = pd.read_parquet(Path(data_dir, \"val_set.parquet\"), engine=\"fastparquet\")\n",
     "y_val = val[\"buy_sell\"]\n",
     "X_val = val.drop(columns=\"buy_sell\")\n",
-    "X_val = X_val.loc[:, features_classical]\n"
+    "X_val = X_val.loc[:, features_classical]"
    ]
   },
   {
@@ -141,7 +139,7 @@
     "    \"sample_weighting\",\n",
     "    \"label_smoothing\",\n",
     "    \"lr_scheduler\",\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -180,7 +178,6 @@
     "results = []\n",
     "\n",
     "for i, setting in enumerate(tqdm(settings)):\n",
-    "\n",
     "    result = []\n",
     "\n",
     "    transformer_kwargs = {\n",
@@ -238,7 +235,7 @@
     "        training_data.x_cont,\n",
     "        training_data.weight,\n",
     "        training_data.y,\n",
-    "        **dl_params\n",
+    "        **dl_params,\n",
     "    )\n",
     "\n",
     "    val_loader = TabDataLoader(\n",
@@ -314,7 +311,6 @@
     "results = []\n",
     "\n",
     "for i, setting in enumerate(tqdm(settings)):\n",
-    "\n",
     "    result = []\n",
     "\n",
     "    transformer_kwargs = {\n",
@@ -372,7 +368,7 @@
     "        training_data.x_cont,\n",
     "        training_data.weight,\n",
     "        training_data.y,\n",
-    "        **dl_params\n",
+    "        **dl_params,\n",
     "    )\n",
     "\n",
     "    val_loader = TabDataLoader(\n",
@@ -421,13 +417,11 @@
     "    val_step = 0\n",
     "\n",
     "    for epoch in range(epochs):\n",
-    "\n",
     "        train_batch = 0\n",
     "\n",
     "        results_epoch = []\n",
     "\n",
     "        for x_cat, x_cont, weights, targets in train_loader:\n",
-    "\n",
     "            clf.train()\n",
     "            optimizer.zero_grad()\n",
     "            with torch.autocast(device_type=\"cuda\", dtype=torch.float16):\n",
@@ -471,7 +465,6 @@
     "\n",
     "        with torch.no_grad():\n",
     "            for x_cat, x_cont, weights, targets in val_loader:\n",
-    "\n",
     "                # for my implementation\n",
     "                logits = clf(x_cat, x_cont).flatten()\n",
     "                logits = logits.flatten()\n",
@@ -507,7 +500,7 @@
     "    gc.collect()\n",
     "    torch.cuda.empty_cache()\n",
     "\n",
-    "    results.append({identifier[i]: result})\n"
+    "    results.append({identifier[i]: result})"
    ]
   },
   {
@@ -522,7 +515,7 @@
     "    key = list(result.keys())[0]\n",
     "    df = pd.DataFrame(result[key])\n",
     "    df.name = key\n",
-    "    dfs.append(df)\n"
+    "    dfs.append(df)"
    ]
   },
   {
@@ -545,7 +538,7 @@
     "result_set.add_reference(output_path, name=\"results\")\n",
     "run.log_artifact(result_set)\n",
     "\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   },
   {
@@ -555,7 +548,7 @@
    "outputs": [],
    "source": [
     "filter_col = [col for col in dfs if col.endswith(\"val_loss\")]\n",
-    "dfs[filter_col].dropna().reset_index(drop=True).plot()\n"
+    "dfs[filter_col].dropna().reset_index(drop=True).plot()"
    ]
   }
  ],
diff --git a/notebooks/6.0h-mb-viz-embeddings.ipynb b/notebooks/6.0h-mb-viz-embeddings.ipynb
index f6028b3a..c9bb7cc8 100644
--- a/notebooks/6.0h-mb-viz-embeddings.ipynb
+++ b/notebooks/6.0h-mb-viz-embeddings.ipynb
@@ -8,23 +8,20 @@
    },
    "outputs": [],
    "source": [
-    "import gcsfs\n",
-    "import google.auth\n",
-    "\n",
-    "\n",
     "import json\n",
     "import os\n",
     "import pickle\n",
     "from pathlib import Path\n",
     "\n",
-    "from adjustText import adjust_text\n",
-    "\n",
+    "import gcsfs\n",
+    "import google.auth\n",
+    "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "from matplotlib import rc\n",
     "import torch\n",
-    "import wandb"
+    "import wandb\n",
+    "from adjustText import adjust_text\n",
+    "from matplotlib import rc"
    ]
   },
   {
@@ -48,7 +45,10 @@
    },
    "outputs": [],
    "source": [
-    "fs.get(\"gs://thesis-bucket-option-trade-classification/data/raw/matched_samples_ise_quotes_extended.csv\", \"ise_matched.csv\")"
+    "fs.get(\n",
+    "    \"gs://thesis-bucket-option-trade-classification/data/raw/matched_samples_ise_quotes_extended.csv\",\n",
+    "    \"ise_matched.csv\",\n",
+    ")"
    ]
   },
   {
@@ -57,8 +57,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sec_id_root = pd.read_csv(\"ise_matched.csv\",usecols=[\"ROOT\", \"secid_OM\"])\n",
-    "sec_id_root = sec_id_root.drop_duplicates(keep=\"last\",subset=\"ROOT\")"
+    "sec_id_root = pd.read_csv(\"ise_matched.csv\", usecols=[\"ROOT\", \"secid_OM\"])\n",
+    "sec_id_root = sec_id_root.drop_duplicates(keep=\"last\", subset=\"ROOT\")"
    ]
   },
   {
@@ -69,8 +69,10 @@
    },
    "outputs": [],
    "source": [
-    "security_names = pd.read_csv('../data/security_name.csv')\n",
-    "security_names = security_names[[\"secid\", \"issuer\"]].drop_duplicates(subset=\"secid\", keep=\"last\")"
+    "security_names = pd.read_csv(\"../data/security_name.csv\")\n",
+    "security_names = security_names[[\"secid\", \"issuer\"]].drop_duplicates(\n",
+    "    subset=\"secid\", keep=\"last\"\n",
+    ")"
    ]
   },
   {
@@ -92,7 +94,9 @@
    },
    "outputs": [],
    "source": [
-    "label = pd.read_csv('../models/metadata.tsv', sep='\\t', header=None).rename({0:\"label\"},axis=1)"
+    "label = pd.read_csv(\"../models/metadata.tsv\", sep=\"\\t\", header=None).rename(\n",
+    "    {0: \"label\"}, axis=1\n",
+    ")"
    ]
   },
   {
@@ -116,7 +120,9 @@
    "source": [
     "label_commented = label_merged[\"label\"]\n",
     "\n",
-    "commented_label = label_merged[\"label\"].astype(str) + \" (\" + label_merged[\"issuer\"].astype(str) + \")\"\n",
+    "commented_label = (\n",
+    "    label_merged[\"label\"].astype(str) + \" (\" + label_merged[\"issuer\"].astype(str) + \")\"\n",
+    ")\n",
     "# skip issue type and option type\n",
     "label_commented.iloc[8:] = commented_label.iloc[8:]"
    ]
@@ -129,7 +135,7 @@
    },
    "outputs": [],
    "source": [
-    "label_commented.to_csv('../models/metadata_clearlabels.tsv',sep=\"\\t\")"
+    "label_commented.to_csv(\"../models/metadata_clearlabels.tsv\", sep=\"\\t\")"
    ]
   },
   {
@@ -150,7 +156,7 @@
     "plt.rcParams.update(params)\n",
     "rc(\"text\", usetex=True)\n",
     "\n",
-    "plt.rc('text.latex', preamble=r'\\usepackage{amsmath}\\usepackage[utf8]{inputenc}')\n",
+    "plt.rc(\"text.latex\", preamble=r\"\\usepackage{amsmath}\\usepackage[utf8]{inputenc}\")\n",
     "\n",
     "CM = 1 / 2.54"
    ]
@@ -193,8 +199,8 @@
     "\n",
     "artifact = run.use_artifact(model)\n",
     "model_dir = artifact.download()\n",
-    "    \n",
-    "with open(Path(model_dir, model_name), 'rb') as f:\n",
+    "\n",
+    "with open(Path(model_dir, model_name), \"rb\") as f:\n",
     "    model = pickle.load(f)\n",
     "\n",
     "embeddings = model.clf.feature_tokenizer.cat_tokenizer.embeddings.weight.to(\"cpu\")"
@@ -219,9 +225,9 @@
    "source": [
     "# as done https://github.com/pytorch/pytorch/issues/51445\n",
     "f = open(\"tensors.tsv\", mode=\"a\")\n",
-    "for x in embeddings: \n",
-    "    x = [str(i.item()) for i in x] \n",
-    "    f.write('\\t'.join(x) + '\\n')\n",
+    "for x in embeddings:\n",
+    "    x = [str(i.item()) for i in x]\n",
+    "    f.write(\"\\t\".join(x) + \"\\n\")\n",
     "f.close()"
    ]
   },
@@ -242,7 +248,7 @@
    "outputs": [],
    "source": [
     "# generate t-sne projection using save to bookmark feature https://projector.tensorflow.org/\n",
-    "with open('../models/state.txt') as f:\n",
+    "with open(\"../models/state.txt\") as f:\n",
     "    d = json.load(f)"
    ]
   },
@@ -252,9 +258,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tsne_projections = pd.DataFrame(d[0]['projections'])\n",
+    "tsne_projections = pd.DataFrame(d[0][\"projections\"])\n",
     "# get labels from scalers\n",
-    "label = pd.read_csv('../models/metadata.tsv', sep='\\t', header=None).rename({0:\"label\"},axis=1)"
+    "label = pd.read_csv(\"../models/metadata.tsv\", sep=\"\\t\", header=None).rename(\n",
+    "    {0: \"label\"}, axis=1\n",
+    ")"
    ]
   },
   {
@@ -266,9 +274,7 @@
    "outputs": [],
    "source": [
     "def cos_dist_norm(matrix_of_vectors: torch.Tensor):\n",
-    "    \"\"\"\n",
-    "    Compute the cosine distance ([0, 2]) between two vectors that have been normalized to unit norm.\n",
-    "    \"\"\"\n",
+    "    \"\"\"Compute the cosine distance ([0, 2]) between two vectors that have been normalized to unit norm.\"\"\"\n",
     "    return 1 - matrix_of_vectors @ matrix_of_vectors.T"
    ]
   },
@@ -281,8 +287,7 @@
    "outputs": [],
    "source": [
     "def cos_sim(matrix_of_vectors: torch.Tensor):\n",
-    "    \"\"\"\n",
-    "    Computes cosine similarities for between all vectors, extremely useful for comparing\n",
+    "    \"\"\"Computes cosine similarities for between all vectors, extremely useful for comparing\n",
     "    similarities between embeddings when doing deep embedding learning.\n",
     "\n",
     "    Adapted from: https://github.com/dalisson/pairwise_cosine_distance_pytorch/blob/master/pairwise_cosine_similarity.py\n",
@@ -290,7 +295,7 @@
     "    and:\n",
     "    https://github.com/tensorflow/tensorboard/blob/00eeb7adcbf341ec25b49c37abee1cfe395ea1f9/tensorboard/plugins/projector/vz_projector/vz-projector-inspector-panel.ts#L398\n",
     "    https://github.com/tensorflow/tensorboard/blob/00eeb7adcbf341ec25b49c37abee1cfe395ea1f9/tensorboard/plugins/projector/vz_projector/vector.ts#L64\n",
-    "    \n",
+    "\n",
     "    input:\n",
     "        matrix_of_vectors: tensor with shape (n_vectors, vector_size)\n",
     "\n",
@@ -300,11 +305,10 @@
     "    row[0,0] is 1 and row[0,42] is the similarity between the first\n",
     "    element in the input and the 43th element in the input.\n",
     "    \"\"\"\n",
-    "\n",
     "    dot_product = matrix_of_vectors @ matrix_of_vectors.t()\n",
     "    norms = torch.sqrt(torch.einsum(\"ii->i\", dot_product))\n",
     "    similarities = dot_product / (norms[None] * norms[..., None])\n",
-    "    return similarities\n"
+    "    return similarities"
    ]
   },
   {
@@ -316,9 +320,7 @@
    "outputs": [],
    "source": [
     "def cos_dist(matrix_of_vectors: torch.Tensor):\n",
-    "    \"\"\"\n",
-    "    Compute the cosine distance ([0, 2]) between two vectors.\n",
-    "    \"\"\"\n",
+    "    \"\"\"Compute the cosine distance ([0, 2]) between two vectors.\"\"\"\n",
     "    return 1 - cos_sim(matrix_of_vectors)"
    ]
   },
@@ -422,7 +424,6 @@
     "texts = []\n",
     "\n",
     "for i, cond in enumerate(mask):\n",
-    "\n",
     "    if cond:\n",
     "        l = label[\"label\"].iloc[i]\n",
     "        factor = 1.5 if l == key else 1\n",
@@ -449,7 +450,7 @@
     "\n",
     "fig.tight_layout()\n",
     "\n",
-    "plt.savefig(f\"../reports/Graphs/categorical_embeddings_{key}.pdf\", bbox_inches=\"tight\")\n"
+    "plt.savefig(f\"../reports/Graphs/categorical_embeddings_{key}.pdf\", bbox_inches=\"tight\")"
    ]
   }
  ],
diff --git a/notebooks/6.0i-mb-discussion.ipynb b/notebooks/6.0i-mb-discussion.ipynb
index 8576cd68..04896590 100644
--- a/notebooks/6.0i-mb-discussion.ipynb
+++ b/notebooks/6.0i-mb-discussion.ipynb
@@ -10,16 +10,13 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "import random\n",
     "import sys\n",
     "from pathlib import Path\n",
     "\n",
     "import numpy as np\n",
     "import pandas as pd\n",
-    "from sklearn.metrics import accuracy_score\n",
     "\n",
     "sys.path.append(\"..\")\n",
-    "import warnings\n",
     "\n",
     "import wandb\n",
     "from tqdm.auto import tqdm\n",
@@ -96,14 +93,14 @@
     "    test = pd.read_parquet(\n",
     "        Path(data_dir, \"test_set.parquet\"), engine=\"fastparquet\", columns=columns\n",
     "    )\n",
-    "    \n",
+    "\n",
     "elif strategy == \"transfer\":\n",
     "    # load test set\n",
     "    test = pd.read_parquet(\n",
     "        Path(data_dir, \"test_set.parquet\"), engine=\"fastparquet\", columns=columns\n",
     "    )\n",
     "\n",
-    "    \n",
+    "\n",
     "elif strategy == \"unsupervised\":\n",
     "    # load unlabelled training set\n",
     "    train = pd.read_parquet(\n",
@@ -120,19 +117,26 @@
    "outputs": [],
    "source": [
     "def summarize_stats(df):\n",
-    "    summary_stats = pd.DataFrame(index=df.columns)  # Create an empty DataFrame with column names as index\n",
+    "    summary_stats = pd.DataFrame(\n",
+    "        index=df.columns\n",
+    "    )  # Create an empty DataFrame with column names as index\n",
     "\n",
     "    # Calculate summary statistics\n",
     "    # summary_stats['Count'] = df.count()\n",
     "    # summary_stats['Nunique'] = df.nunique()\n",
     "\n",
     "    summary_stats = df.describe(percentiles=[0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99])\n",
-    "    summary_stats = summary_stats.transpose()  # Transpose the table to have columns as variables\n",
+    "    summary_stats = (\n",
+    "        summary_stats.transpose()\n",
+    "    )  # Transpose the table to have columns as variables\n",
     "\n",
     "    # Rename the columns\n",
-    "    summary_stats.rename(columns={'mean': 'Mean', 'std': 'SD', '50%': 'Median'},\n",
-    "                         inplace=True)\n",
-    "    return summary_stats[[\"Mean\", \"SD\", \"1%\", \"5%\", \"25%\", \"Median\", \"75%\",\"95%\", \"99%\"]]"
+    "    summary_stats.rename(\n",
+    "        columns={\"mean\": \"Mean\", \"std\": \"SD\", \"50%\": \"Median\"}, inplace=True\n",
+    "    )\n",
+    "    return summary_stats[\n",
+    "        [\"Mean\", \"SD\", \"1%\", \"5%\", \"25%\", \"Median\", \"75%\", \"95%\", \"99%\"]\n",
+    "    ]"
    ]
   },
   {
@@ -196,10 +200,10 @@
    "outputs": [],
    "source": [
     "# set here globally\n",
-    "EXCHANGE = \"ise\" # \"ise\"\n",
-    "MODELS = [\"classical\"] # \"classical\", \"fttransformer\", \"gbm\"\n",
+    "EXCHANGE = \"ise\"  # \"ise\"\n",
+    "MODELS = [\"classical\"]  # \"classical\", \"fttransformer\", \"gbm\"\n",
     "SUBSET = \"all\"  # \"all\"\n",
-    "STRATEGY = \"supervised\" # \"supervised\"  \n",
+    "STRATEGY = \"supervised\"  # \"supervised\"\n",
     "\n",
     "RETRAIN = False"
    ]
@@ -220,7 +224,7 @@
     "run = wandb.init(project=\"thesis\", entity=\"fbv\")\n",
     "\n",
     "# load unscaled data\n",
-    "artifact = run.use_artifact(DATASET)  \n",
+    "artifact = run.use_artifact(DATASET)\n",
     "data_dir = artifact.download()\n",
     "\n",
     "# load results\n",
@@ -231,9 +235,9 @@
     "        results = f\"fbv/thesis/{EXCHANGE}_{model}_{STRATEGY}_{SUBSET}_retrain:latest\"\n",
     "    else:\n",
     "        results = f\"fbv/thesis/{EXCHANGE}_{model}_{STRATEGY}_{SUBSET}:latest\"\n",
-    "    artifact = run.use_artifact(results)  \n",
+    "    artifact = run.use_artifact(results)\n",
     "    result_dir = artifact.download()\n",
-    "    result_dirs.append(result_dir)\n"
+    "    result_dirs.append(result_dir)"
    ]
   },
   {
@@ -297,7 +301,7 @@
     "\n",
     "X_print = eval_data\n",
     "\n",
-    "del results\n"
+    "del results"
    ]
   },
   {
@@ -440,14 +444,16 @@
    },
    "outputs": [],
    "source": [
-    "pivot_table = pd.pivot_table(X_print, \n",
-    "                             values='values',\n",
-    "                             columns='prox_q_binned',\n",
-    "                             index='TRADE_SIZE_binned',\n",
-    "               aggfunc=sum,\n",
-    "               fill_value=0,\n",
-    "               margins=True)\n",
-    "pivot_table.div(pivot_table.iloc[:,-1], axis=0 )"
+    "pivot_table = pd.pivot_table(\n",
+    "    X_print,\n",
+    "    values=\"values\",\n",
+    "    columns=\"prox_q_binned\",\n",
+    "    index=\"TRADE_SIZE_binned\",\n",
+    "    aggfunc=sum,\n",
+    "    fill_value=0,\n",
+    "    margins=True,\n",
+    ")\n",
+    "pivot_table.div(pivot_table.iloc[:, -1], axis=0)"
    ]
   },
   {
@@ -465,14 +471,16 @@
    },
    "outputs": [],
    "source": [
-    "pivot_table = pd.pivot_table(X_print, \n",
-    "                             values='values',\n",
-    "                             columns='myn_binned',\n",
-    "                             index='TRADE_SIZE_binned',\n",
-    "               aggfunc=sum,\n",
-    "               fill_value=0,\n",
-    "               margins=True)\n",
-    "pivot_table.div(pivot_table.iloc[:,-1], axis=0 )"
+    "pivot_table = pd.pivot_table(\n",
+    "    X_print,\n",
+    "    values=\"values\",\n",
+    "    columns=\"myn_binned\",\n",
+    "    index=\"TRADE_SIZE_binned\",\n",
+    "    aggfunc=sum,\n",
+    "    fill_value=0,\n",
+    "    margins=True,\n",
+    ")\n",
+    "pivot_table.div(pivot_table.iloc[:, -1], axis=0)"
    ]
   },
   {
@@ -491,14 +499,16 @@
    "outputs": [],
    "source": [
     "# savickas: trades with longer maturity tend to be smaller\n",
-    "pivot_table = pd.pivot_table(X_print, \n",
-    "                             values='values',\n",
-    "                             index='ttm_binned',\n",
-    "                             columns='TRADE_SIZE_binned',\n",
-    "               aggfunc=sum,\n",
-    "               fill_value=0,\n",
-    "               margins=True)\n",
-    "pivot_table.div(pivot_table.iloc[:,-1], axis=0 )"
+    "pivot_table = pd.pivot_table(\n",
+    "    X_print,\n",
+    "    values=\"values\",\n",
+    "    index=\"ttm_binned\",\n",
+    "    columns=\"TRADE_SIZE_binned\",\n",
+    "    aggfunc=sum,\n",
+    "    fill_value=0,\n",
+    "    margins=True,\n",
+    ")\n",
+    "pivot_table.div(pivot_table.iloc[:, -1], axis=0)"
    ]
   },
   {
@@ -516,14 +526,16 @@
    },
    "outputs": [],
    "source": [
-    "pivot_table = pd.pivot_table(X_print, \n",
-    "                             values='values',\n",
-    "                             index='issue_type',\n",
-    "                             columns=None,\n",
-    "               aggfunc=sum,\n",
-    "               fill_value=0,\n",
-    "               margins=True)\n",
-    "pivot_table.div(pivot_table.iloc[-1], axis=1)\n"
+    "pivot_table = pd.pivot_table(\n",
+    "    X_print,\n",
+    "    values=\"values\",\n",
+    "    index=\"issue_type\",\n",
+    "    columns=None,\n",
+    "    aggfunc=sum,\n",
+    "    fill_value=0,\n",
+    "    margins=True,\n",
+    ")\n",
+    "pivot_table.div(pivot_table.iloc[-1], axis=1)"
    ]
   },
   {
@@ -541,13 +553,15 @@
    },
    "outputs": [],
    "source": [
-    "pivot_table = pd.pivot_table(X_print, \n",
-    "                             values='values',\n",
-    "                             index='prox_q_binned',\n",
-    "                             columns=None,\n",
-    "               aggfunc=sum,\n",
-    "               fill_value=0,\n",
-    "               margins=True)\n",
+    "pivot_table = pd.pivot_table(\n",
+    "    X_print,\n",
+    "    values=\"values\",\n",
+    "    index=\"prox_q_binned\",\n",
+    "    columns=None,\n",
+    "    aggfunc=sum,\n",
+    "    fill_value=0,\n",
+    "    margins=True,\n",
+    ")\n",
     "pivot_table.div(pivot_table.iloc[-1], axis=1)"
    ]
   },
@@ -570,7 +584,9 @@
     "results = []\n",
     "\n",
     "# calculate true rel effective spread but not aggregated, convert to %\n",
-    "es_true = effective_spread(X_print[\"buy_sell\"], X_print[\"TRADE_PRICE\"], mid, mode=\"none\")\n",
+    "es_true = effective_spread(\n",
+    "    X_print[\"buy_sell\"], X_print[\"TRADE_PRICE\"], mid, mode=\"none\"\n",
+    ")\n",
     "nom_true = np.nanmean(es_true)\n",
     "\n",
     "eps_true = np.empty(es_true.shape)\n",
@@ -579,26 +595,31 @@
     "\n",
     "\n",
     "for classifier in tqdm(classifiers):\n",
-    "\n",
     "    # calculate pred rel effective spread but not aggregated convert to %\n",
-    "    es_pred = effective_spread(X_print[classifier], X_print[\"TRADE_PRICE\"], mid, mode=\"none\")\n",
-    "    \n",
+    "    es_pred = effective_spread(\n",
+    "        X_print[classifier], X_print[\"TRADE_PRICE\"], mid, mode=\"none\"\n",
+    "    )\n",
+    "\n",
     "    eps_pred = np.empty(es_pred.shape)\n",
     "    np.divide(es_pred, mid, out=eps_pred, where=mid != 0)\n",
     "\n",
-    "    wilcoxon_res  = wilcoxon(eps_pred, eps_true, nan_policy=\"omit\", zero_method=\"zsplit\")\n",
+    "    wilcoxon_res = wilcoxon(eps_pred, eps_true, nan_policy=\"omit\", zero_method=\"zsplit\")\n",
     "\n",
     "    res = pd.Series(\n",
-    "            {\n",
-    "                \"nom_pred\": np.nanmean(es_pred),\n",
-    "                \"rel_pred\": np.nanmean(eps_pred),\n",
-    "                \"statistic\":wilcoxon_res.statistic,\n",
-    "                \"pvalue\":wilcoxon_res.pvalue,\n",
-    "            }, name=classifier\n",
-    "        )\n",
+    "        {\n",
+    "            \"nom_pred\": np.nanmean(es_pred),\n",
+    "            \"rel_pred\": np.nanmean(eps_pred),\n",
+    "            \"statistic\": wilcoxon_res.statistic,\n",
+    "            \"pvalue\": wilcoxon_res.pvalue,\n",
+    "        },\n",
+    "        name=classifier,\n",
+    "    )\n",
     "    results.append(res)\n",
     "\n",
-    "true_eff = pd.Series({\"nom_pred\":nom_true, \"rel_pred\": rel_true, \"statistic\":np.NaN, \"pvalue\":np.NaN}, name=\"true_eff\")\n",
+    "true_eff = pd.Series(\n",
+    "    {\"nom_pred\": nom_true, \"rel_pred\": rel_true, \"statistic\": np.nan, \"pvalue\": np.nan},\n",
+    "    name=\"true_eff\",\n",
+    ")\n",
     "\n",
     "results.append(true_eff)\n",
     "\n",
@@ -613,7 +634,7 @@
    },
    "outputs": [],
    "source": [
-    "results.T.style.format(\"{:.3f}\")\n"
+    "results.T.style.format(\"{:.3f}\")"
    ]
   },
   {
@@ -631,7 +652,7 @@
     "    label=f\"tab:eff-{KEY}\",\n",
     "    caption=(f\"long-eff-{KEY}\", f\"short-eff-{KEY}\"),\n",
     "    convert_css=True,\n",
-    ")\n"
+    ")"
    ]
   }
  ],
diff --git a/src/otc/models/fttransformer.py b/src/otc/models/fttransformer.py
index 627e5cca..590a6791 100644
--- a/src/otc/models/fttransformer.py
+++ b/src/otc/models/fttransformer.py
@@ -34,10 +34,8 @@ def _is_glu_activation(activation: Callable[..., nn.Module]) -> bool:
         bool: truth value.
     """
     return (
-        isinstance(activation, str)
-        and activation.endswith("GLU")
-        or activation in [ReGLU, GeGLU]
-    )
+        isinstance(activation, str) and activation.endswith("GLU")
+    ) or activation in [ReGLU, GeGLU]
 
 
 def _all_or_none(values: list[Any]) -> bool: