KarelZe · pre-commit-ci · Nov 25, 2024 · Nov 25, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,15 +13,15 @@ ci:
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
   - repo: https://github.com/kynan/nbstripout
-    rev: 0.7.1
+    rev: 0.8.1
     hooks:
       - id: nbstripout
   # - repo: https://github.com/cmhughes/latexindent.pl.git
   #   rev: V3.19.1
   #   hooks:
   #     - id: latexindent
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: trailing-whitespace
       - id: check-added-large-files
@@ -43,7 +43,7 @@ repos:
   #       # Similar to: https://stackoverflow.com/a/73603491/5755604
   #       additional_dependencies: ['types-PyYAML']
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.9
+    rev: v0.8.0
     hooks:
     - id: ruff
       args:

diff --git a/notebooks/1.0-mb-data-preprocessing-mem-reduce.ipynb b/notebooks/1.0-mb-data-preprocessing-mem-reduce.ipynb
@@ -31,7 +31,7 @@
     "ProgressBar.enable()\n",
     "\n",
     "import wandb\n",
-    "from tqdm.auto import tqdm\n"
+    "from tqdm.auto import tqdm"
    ]
   },
   {
@@ -47,7 +47,7 @@
     "FILE_PATH_INPUT = (\n",
     "    \"gs://thesis-bucket-option-trade-classification/data/raw/matched_cboe_quotes.csv\"\n",
     ")\n",
-    "FILE_PATH_OUTPUT = \"gs://thesis-bucket-option-trade-classification/data/preprocessed/\"\n"
+    "FILE_PATH_OUTPUT = \"gs://thesis-bucket-option-trade-classification/data/preprocessed/\""
    ]
   },
   {
@@ -58,7 +58,7 @@
    "source": [
     "os.environ[\"GCLOUD_PROJECT\"] = \"flowing-mantis-239216\"\n",
     "credentials, _ = google.auth.default()\n",
-    "fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)\n"
+    "fs = gcsfs.GCSFileSystem(project=\"thesis\", token=credentials)"
    ]
   },
   {
@@ -76,7 +76,7 @@
    "source": [
     "# connect to weights and biases\n",
     "run = wandb.init(project=\"thesis\", job_type=\"dataset-creation\", entity=\"fbv\")\n",
-    "dataset = wandb.Artifact(name=f\"{EXCHANGE}_{STRATEGY}_csv\", type=\"raw_data\")\n"
+    "dataset = wandb.Artifact(name=f\"{EXCHANGE}_{STRATEGY}_csv\", type=\"raw_data\")"
    ]
   },
   {
@@ -88,8 +88,7 @@
    "outputs": [],
    "source": [
     "def import_data(input_file: str) -> pd.DataFrame:\n",
-    "    \"\"\"\n",
-    "    create a dataframe and optimize its memory usage.\n",
+    "    \"\"\"Create a dataframe and optimize its memory usage.\n",
     "\n",
     "    I.e., apply some optimizations i.e, manual inference of dtypes, pre-selection\n",
     "    of unique columns and chunking to enable import.\n",
@@ -189,7 +188,7 @@
     "\n",
     "    format = \"%d%b%y:%H:%M:%S\"\n",
     "    df[\"QUOTE_DATETIME\"] = pd.to_datetime(df[\"QUOTE_DATETIME\"], format=format)\n",
-    "    return df\n"
+    "    return df"
    ]
   },
   {
@@ -203,8 +202,7 @@
     "def df_to_parquet(\n",
     "    x: pd.DataFrame, target_dir: str, chunk_size: int = 1000000, **parquet_wargs\n",
     ") -> None:\n",
-    "    \"\"\"\n",
-    "    Write pd.DataFrame to parquet format.\n",
+    "    \"\"\"Write pd.DataFrame to parquet format.\n",
     "\n",
     "    Args:\n",
     "        x (pd.DataFrame): input dataframe.\n",
@@ -222,7 +220,7 @@
     "        slc.to_parquet(output_path, **parquet_wargs)\n",
     "\n",
     "        # log in w & b\n",
-    "        dataset.add_reference(output_path, name=f\"raw_parquet_{chunk:04d}\")\n"
+    "        dataset.add_reference(output_path, name=f\"raw_parquet_{chunk:04d}\")"
    ]
   },
   {
@@ -805,7 +803,7 @@
     "client = Client()\n",
     "\n",
     "df = import_data(FILE_PATH_INPUT)\n",
-    "df_to_parquet(df, FILE_PATH_OUTPUT)\n"
+    "df_to_parquet(df, FILE_PATH_OUTPUT)"
    ]
   },
   {
@@ -833,7 +831,7 @@
    "source": [
     "# Log the artifact to save it as an output of this run\n",
     "run.log_artifact(dataset)\n",
-    "wandb.finish()\n"
+    "wandb.finish()"
    ]
   }
  ],