diff --git a/notebooks/format_results.ipynb b/notebooks/format_results.ipynb
new file mode 100644
index 0000000..5e87ccd
--- /dev/null
+++ b/notebooks/format_results.ipynb
@@ -0,0 +1,833 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "ecedb1c0-d2cc-44d5-aa6e-9de150509d6c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import glob\n",
+ "import pandas as pd\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib.image as mpimg\n",
+ "\n",
+ "sns.set_theme()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "b1ba7767-0e5d-482e-a138-e3a2542c3a2f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "param = 'tn'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "c13f310a-9569-4b83-b26b-9c26ceb4a39a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "versions = [f'{param}_model_v{i}' for i in range(1, 5)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "id": "9fc471e2-0162-4260-92e6-f7f2b2b69865",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['tn_model_v1', 'tn_model_v2', 'tn_model_v3', 'tn_model_v4']"
+ ]
+ },
+ "execution_count": 68,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "versions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "0c95583c-1243-4ba2-84ad-8939d9a96c73",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'tn_model_v1': 0.9,\n",
+ " 'tn_model_v2': 0.8,\n",
+ " 'tn_model_v3': 0.7,\n",
+ " 'tn_model_v4': 0.6}"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "threshold_dict = dict(zip(versions, [0.9, 0.8, 0.7, 0.6]))\n",
+ "threshold_dict"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "id": "a1757af3-148c-41a8-a7a7-4b88ea2516f0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "files = [f'D:/est_water_qual/model/{version}/{version}_results.csv' for version in versions]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "id": "6aa5b0f7-43ca-4667-ad18-ab11276348b1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['D:/est_water_qual/model/tn_model_v1/tn_model_v1_results.csv',\n",
+ " 'D:/est_water_qual/model/tn_model_v2/tn_model_v2_results.csv',\n",
+ " 'D:/est_water_qual/model/tn_model_v3/tn_model_v3_results.csv',\n",
+ " 'D:/est_water_qual/model/tn_model_v4/tn_model_v4_results.csv']"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "files"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "id": "cba86bd0-0930-41ee-83e1-837bfa0bcfc4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results = pd.DataFrame({'Attribute': pd.read_csv(files[0])['Attribute']})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "id": "83c75ff6-36ee-46a3-be94-6fc4b8e494c7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Attribute | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " n_features | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " test_size | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " n_samples_train | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " n_samples_test | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " r2_train | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " r2_test | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " oob_score | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " nse_test | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " pbias_test | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " max_depth | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " max_features | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " min_samples_leaf | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " min_samples_split | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " n_estimators | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " bootstrap | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Attribute\n",
+ "0 n_features\n",
+ "1 test_size\n",
+ "2 n_samples_train\n",
+ "3 n_samples_test\n",
+ "4 r2_train\n",
+ "5 r2_test\n",
+ "6 oob_score\n",
+ "7 nse_test\n",
+ "8 pbias_test\n",
+ "9 max_depth\n",
+ "10 max_features\n",
+ "11 min_samples_leaf\n",
+ "12 min_samples_split\n",
+ "13 n_estimators\n",
+ "14 bootstrap"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "id": "726914fd-5a37-4c69-99ec-c9b69c8cddf7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for file, version in zip(files, versions):\n",
+ " df = pd.read_csv(file)\n",
+ " results = results.merge(df, how='outer', on='Attribute')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "id": "a45dbcc7-4914-4a84-b803-72b87219bbd7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Attribute | \n",
+ " TN_MODEL_V1 | \n",
+ " TN_MODEL_V2 | \n",
+ " TN_MODEL_V3 | \n",
+ " TN_MODEL_V4 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " n_features | \n",
+ " 62 | \n",
+ " 55 | \n",
+ " 47 | \n",
+ " 38 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " test_size | \n",
+ " 0.3 | \n",
+ " 0.3 | \n",
+ " 0.3 | \n",
+ " 0.3 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " n_samples_train | \n",
+ " 328 | \n",
+ " 328 | \n",
+ " 328 | \n",
+ " 328 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " n_samples_test | \n",
+ " 141 | \n",
+ " 141 | \n",
+ " 141 | \n",
+ " 141 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " r2_train | \n",
+ " 0.8768110787259299 | \n",
+ " 0.9370844249679031 | \n",
+ " 0.9174176077581037 | \n",
+ " 0.9277804571934218 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " r2_test | \n",
+ " 0.7914042073068632 | \n",
+ " 0.8345873204768984 | \n",
+ " 0.8205758597605688 | \n",
+ " 0.8325944480437459 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " oob_score | \n",
+ " 0.7011273201967085 | \n",
+ " 0.7268873570430123 | \n",
+ " 0.7136227756184379 | \n",
+ " 0.7378667449603072 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " nse_test | \n",
+ " 0.7914042073068632 | \n",
+ " 0.8345873204768984 | \n",
+ " 0.8205758597605688 | \n",
+ " 0.8325944480437459 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " pbias_test | \n",
+ " -0.005326843287056666 | \n",
+ " 0.034952618333772785 | \n",
+ " 0.018924951828989685 | \n",
+ " 0.01821177573840633 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " max_depth | \n",
+ " 90 | \n",
+ " 30 | \n",
+ " 30 | \n",
+ " 60 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " max_features | \n",
+ " sqrt | \n",
+ " sqrt | \n",
+ " sqrt | \n",
+ " sqrt | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " min_samples_leaf | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " min_samples_split | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " n_estimators | \n",
+ " 30 | \n",
+ " 30 | \n",
+ " 30 | \n",
+ " 60 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " bootstrap | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Attribute TN_MODEL_V1 TN_MODEL_V2 \\\n",
+ "0 n_features 62 55 \n",
+ "1 test_size 0.3 0.3 \n",
+ "2 n_samples_train 328 328 \n",
+ "3 n_samples_test 141 141 \n",
+ "4 r2_train 0.8768110787259299 0.9370844249679031 \n",
+ "5 r2_test 0.7914042073068632 0.8345873204768984 \n",
+ "6 oob_score 0.7011273201967085 0.7268873570430123 \n",
+ "7 nse_test 0.7914042073068632 0.8345873204768984 \n",
+ "8 pbias_test -0.005326843287056666 0.034952618333772785 \n",
+ "9 max_depth 90 30 \n",
+ "10 max_features sqrt sqrt \n",
+ "11 min_samples_leaf 4 2 \n",
+ "12 min_samples_split 5 2 \n",
+ "13 n_estimators 30 30 \n",
+ "14 bootstrap True True \n",
+ "\n",
+ " TN_MODEL_V3 TN_MODEL_V4 \n",
+ "0 47 38 \n",
+ "1 0.3 0.3 \n",
+ "2 328 328 \n",
+ "3 141 141 \n",
+ "4 0.9174176077581037 0.9277804571934218 \n",
+ "5 0.8205758597605688 0.8325944480437459 \n",
+ "6 0.7136227756184379 0.7378667449603072 \n",
+ "7 0.8205758597605688 0.8325944480437459 \n",
+ "8 0.018924951828989685 0.01821177573840633 \n",
+ "9 30 60 \n",
+ "10 sqrt sqrt \n",
+ "11 2 2 \n",
+ "12 2 5 \n",
+ "13 30 60 \n",
+ "14 True True "
+ ]
+ },
+ "execution_count": 75,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "id": "299b5a6a-bec3-4a29-8af4-9ddf14ce8aa8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for version in versions:\n",
+ " for i in range(4, 9):\n",
+ " results[version.upper()].iloc[i] = round(float(results[version.upper()].iloc[i]), 3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "id": "9aa781e8-0c28-429f-9dbd-aafaefd04c61",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Attribute | \n",
+ " TN_MODEL_V1 | \n",
+ " TN_MODEL_V2 | \n",
+ " TN_MODEL_V3 | \n",
+ " TN_MODEL_V4 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " n_features | \n",
+ " 62 | \n",
+ " 55 | \n",
+ " 47 | \n",
+ " 38 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " test_size | \n",
+ " 0.3 | \n",
+ " 0.3 | \n",
+ " 0.3 | \n",
+ " 0.3 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " n_samples_train | \n",
+ " 328 | \n",
+ " 328 | \n",
+ " 328 | \n",
+ " 328 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " n_samples_test | \n",
+ " 141 | \n",
+ " 141 | \n",
+ " 141 | \n",
+ " 141 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " r2_train | \n",
+ " 0.877 | \n",
+ " 0.937 | \n",
+ " 0.917 | \n",
+ " 0.928 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " r2_test | \n",
+ " 0.791 | \n",
+ " 0.835 | \n",
+ " 0.821 | \n",
+ " 0.833 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " oob_score | \n",
+ " 0.701 | \n",
+ " 0.727 | \n",
+ " 0.714 | \n",
+ " 0.738 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " nse_test | \n",
+ " 0.791 | \n",
+ " 0.835 | \n",
+ " 0.821 | \n",
+ " 0.833 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " pbias_test | \n",
+ " -0.005 | \n",
+ " 0.035 | \n",
+ " 0.019 | \n",
+ " 0.018 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " max_depth | \n",
+ " 90 | \n",
+ " 30 | \n",
+ " 30 | \n",
+ " 60 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " max_features | \n",
+ " sqrt | \n",
+ " sqrt | \n",
+ " sqrt | \n",
+ " sqrt | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " min_samples_leaf | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " min_samples_split | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " n_estimators | \n",
+ " 30 | \n",
+ " 30 | \n",
+ " 30 | \n",
+ " 60 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " bootstrap | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Attribute TN_MODEL_V1 TN_MODEL_V2 TN_MODEL_V3 TN_MODEL_V4\n",
+ "0 n_features 62 55 47 38\n",
+ "1 test_size 0.3 0.3 0.3 0.3\n",
+ "2 n_samples_train 328 328 328 328\n",
+ "3 n_samples_test 141 141 141 141\n",
+ "4 r2_train 0.877 0.937 0.917 0.928\n",
+ "5 r2_test 0.791 0.835 0.821 0.833\n",
+ "6 oob_score 0.701 0.727 0.714 0.738\n",
+ "7 nse_test 0.791 0.835 0.821 0.833\n",
+ "8 pbias_test -0.005 0.035 0.019 0.018\n",
+ "9 max_depth 90 30 30 60\n",
+ "10 max_features sqrt sqrt sqrt sqrt\n",
+ "11 min_samples_leaf 4 2 2 2\n",
+ "12 min_samples_split 5 2 2 5\n",
+ "13 n_estimators 30 30 30 60\n",
+ "14 bootstrap True True True True"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "id": "31255d10-61b4-4c4b-adef-b6e4176fd58e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results = results.drop(results.index[7:9]).reset_index(drop=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "id": "a8d89d9c-e265-47e7-bd81-0a5cf7d429fc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "caption = f'''Results of the four model versions used for {param.upper()} prediction along with hyperparameters derived from the RandomizedSearchCV \n",
+ "algorithm.'''\n",
+ "results.to_latex(\n",
+ " f'D:/est_water_qual/model/{param}_results.tex', index=False, longtable=True, caption=caption, label=f'table:{param}_results'\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "id": "5d5e4461-956b-4a9a-833f-3fb91a3aef14",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ "