plot cv results

OuyangWenyu · OuyangWenyu · commit 57084e95afdf · 2024-03-27T11:28:08.000+08:00
diff --git a/hydromodel/datasets/data_postprocess.py b/hydromodel/datasets/data_postprocess.py
@@ -1,9 +1,9 @@
 """Show results of calibration and validation."""
+
 import os
 from matplotlib import pyplot as plt
 import numpy as np
 import pandas as pd
-import spotpy
 
 from hydroutils import hydro_file, hydro_stat
 
@@ -54,18 +54,15 @@ def plot_train_iteration(likelihood, save_fig):
     plt.close()
 
 
-def show_sceua_cali_result(
-    sceua_calibrated_file,
+def show_events_result(
     warmup_length,
     save_dir,
-    basin_id,
     train_period,
-    result_unit="mm/hour",
     basin_area=None,
     prcp=None,
 ):
     """
-    Plot all year result to see the effect of optimized parameters
+    Plot all events result to see the effect of optimized parameters
 
     Parameters
     ----------
@@ -84,65 +81,7 @@ def show_sceua_cali_result(
     -------
     None
     """
-    # Load the results gained with the sceua sampler, stored in SCEUA_xaj.csv
-    # results = []
-    # for chunk in pd.read_csv(sceua_calibrated_file, chunksize=100000 ):
-    #  results.append(chunk)
-    # results = pd.concat(results)
-    results = spotpy.analyser.load_csv_results(sceua_calibrated_file)  # 读取结果
-    # Plot how the objective function was minimized during sampling
-    if not os.path.exists(save_dir):  # 绘制采样过程中目标函数的最小化情况
-        os.makedirs(save_dir)
-    plot_train_iteration(
-        results["like1"],
-        os.path.join(save_dir, "train_iteration.png"),  # 绘制迭代中的RMSE
-    )
-    # Plot the best model run
-    # Find the run_id with the minimal objective function value
-    bestindex, bestobjf = spotpy.analyser.get_minlikeindex(
-        results
-    )  # 绘制最佳模型图并找到run—id
-    # Select best model run
-    best_model_run = results[bestindex]  # 选择最佳模型结果
-    # Filter results for simulation results #最佳模型模拟结果
-    fields = [word for word in best_model_run.dtype.names if word.startswith("sim")]
-    best_simulation = list(best_model_run[fields])
-    convert_unit_sim = units.convert_unit(
-        np.array(best_simulation).reshape(1, -1),
-        # np.array(list(map(float, best_simulation)), dtype=float).reshape(1, -1),
-        result_unit,
-        units.unit["streamflow"],
-        basin_area=basin_area,
-    )
-    convert_unit_obs = units.convert_unit(
-        np.array(spot_setup.evaluation()).reshape(1, -1),
-        result_unit,
-        units.unit["streamflow"],
-        basin_area=basin_area,
-    )
-
-    # save calibrated results of calibration period      #保存率定的结果
-    train_result_file = os.path.join(
-        save_dir,
-        "train_qsim_" + spot_setup.model["name"] + "_" + str(basin_id) + ".csv",
-    )
-    pd.DataFrame(convert_unit_sim.reshape(-1, 1)).to_csv(
-        train_result_file,
-        sep=",",
-        index=False,
-        header=False,
-    )
-    # calculation rmse、nashsutcliffe and bias for training period
-    stat_error = hydro_stat.stat_error(
-        convert_unit_obs,
-        convert_unit_sim,
-    )
-    print("Training Metrics:", basin_id, stat_error)
-    hydro_file.serialize_json_np(
-        stat_error, os.path.join(save_dir, "train_metrics.json")
-    )
-
-    # 循还画图
+    # TODO: not finished
     time = pd.read_excel(
         "D:/研究生/毕业论文/new毕业论文/预答辩/碧流河水库/站点信息/洪水率定时间.xlsx"
     )
@@ -253,7 +192,7 @@ def show_sceua_cali_result(
     plot_sim_and_obs(t_range_train, best_simulation, obs, prcp[:], save_fig)
 
 
-def show_test_result(basin_id, test_date, qsim, obs, save_dir):
+def show_ts_result(basin_id, test_date, qsim, obs, save_dir):
     stat_error = hydro_stat.stat_error(obs.reshape(1, -1), qsim.reshape(1, -1))
     print("Test Metrics:", basin_id, stat_error)
     hydro_file.serialize_json_np(
@@ -371,7 +310,3 @@ def show_test_result(basin_id, test_date, qsim, obs, save_dir):
         prcp[:],
         save_fig,
     )
-    # summarize_parameters(one_model_one_hyperparam_setting_dir, {"name": "xaj_mz"})
-    # renormalize_params(one_model_one_hyperparam_setting_dir, {"name":"xaj_mz"})
-    # summarize_metrics(one_model_one_hyperparam_setting_dir,{"name":"xaj_mz"})
-    # save_streamflow(one_model_one_hyperparam_setting_dir,{"name":"xaj_mz"})
diff --git a/hydromodel/trainers/evaluate.py b/hydromodel/trainers/evaluate.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2022-10-25 21:16:22
-LastEditTime: 2024-03-27 10:42:57
+LastEditTime: 2024-03-27 11:18:09
 LastEditors: Wenyu Ouyang
 Description: Plots for calibration and testing results
 FilePath: \hydro-model-xaj\hydromodel\trainers\evaluate.py
@@ -197,9 +197,13 @@ def _summarize_metrics(self, basin_ids):
         model_name = self.model_info["name"]
         file_path = os.path.join(result_dir, f"{model_name}_evaluation_results.nc")
         ds = xr.open_dataset(file_path)
+        # for metrics, warmup_length should be considered
+        warmup_length = self.config["warmup"]
+        qobs = ds["qobs"].transpose("basin", "time").to_numpy()[:, warmup_length:]
+        qsim = ds["qsim"].transpose("basin", "time").to_numpy()[:, warmup_length:]
         test_metrics = hydro_stat.stat_error(
-            ds["qobs"].transpose("basin", "time").to_numpy(),
-            ds["qsim"].transpose("basin", "time").to_numpy(),
+            qobs,
+            qsim,
         )
         metric_dfs_test = pd.DataFrame(test_metrics, index=basin_ids)
         metric_file_test = os.path.join(result_dir, "basins_metrics.csv")
@@ -224,6 +228,12 @@ def _save_evaluate_results(self, qsim, qobs, obs_ds):
 
         print(f"Results saved to: {file_path}")
 
+    def load_results(self):
+        result_dir = self.save_dir
+        model_name = self.model_info["name"]
+        file_path = os.path.join(result_dir, f"{model_name}_evaluation_results.nc")
+        return xr.open_dataset(file_path)
+
 
 def _read_save_sceua_calibrated_params(basin_id, save_dir, sceua_calibrated_file_name):
     """
diff --git a/scripts/post_process.py b/scripts/post_process.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2022-11-19 17:27:05
-LastEditTime: 2024-03-27 08:37:58
+LastEditTime: 2024-03-27 11:23:02
 LastEditors: Wenyu Ouyang
 Description: the script to postprocess results
 FilePath: \hydro-model-xaj\scripts\post_process.py
@@ -15,20 +15,47 @@
 
 repo_dir = os.path.dirname(Path(os.path.abspath(__file__)).parent)
 sys.path.append(repo_dir)
-from hydromodel.trainers.evaluate import read_yaml_config
+from hydromodel.datasets.data_postprocess import plot_sim_and_obs
+from hydromodel.trainers.evaluate import Evaluator, read_yaml_config
 
 
-def statistics(args):
+def visualize(args):
     exp = args.exp
     cali_dir = Path(os.path.join(repo_dir, "result", exp))
     cali_config = read_yaml_config(os.path.join(cali_dir, "config.yaml"))
-    if os.path.exists(cali_dir) is False:
-        raise NotImplementedError(
-            "You should run prepare_data and calibrate scripts at first."
-        )
-    print(
-        "Compare evaluation results of different calibrated models in an exp directory"
-    )
+    kfold = cali_config["cv_fold"]
+    basins = cali_config["basin_id"]
+    warmup = cali_config["warmup"]
+    for fold in range(kfold):
+        print(f"Start to evaluate the {fold+1}-th fold")
+        fold_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}")
+        # evaluate both train and test period for all basins
+        eval_train_dir = os.path.join(fold_dir, "train")
+        eval_test_dir = os.path.join(fold_dir, "test")
+        train_eval = Evaluator(cali_dir, fold_dir, eval_train_dir)
+        test_eval = Evaluator(cali_dir, fold_dir, eval_test_dir)
+        ds_train = train_eval.load_results()
+        ds_test = test_eval.load_results()
+        for basin in basins:
+            save_fig_train = os.path.join(eval_train_dir, f"train_sim_obs_{basin}.png")
+            plot_sim_and_obs(
+                ds_train["time"].isel(time=slice(warmup, None)),
+                ds_train["qsim"].sel(basin=basin).isel(time=slice(warmup, None)),
+                ds_train["qobs"].sel(basin=basin).isel(time=slice(warmup, None)),
+                save_fig_train,
+                xlabel="Date",
+                ylabel=None,
+            )
+            save_fig_test = os.path.join(eval_test_dir, f"test_sim_obs_{basin}.png")
+            plot_sim_and_obs(
+                ds_test["time"].isel(time=slice(warmup, None)),
+                ds_test["qsim"].sel(basin=basin).isel(time=slice(warmup, None)),
+                ds_test["qobs"].sel(basin=basin).isel(time=slice(warmup, None)),
+                save_fig_test,
+                xlabel="Date",
+                ylabel=None,
+            )
+        print(f"Finish visualizing the {fold}-th fold")
 
 
 if __name__ == "__main__":
@@ -43,4 +70,4 @@ def statistics(args):
         type=str,
     )
     the_args = parser.parse_args()
-    statistics(the_args)
+    visualize(the_args)
diff --git a/test/test_data_postprocess.py b/test/test_data_postprocess.py
@@ -1,20 +1,23 @@
 """
 Author: Wenyu Ouyang
 Date: 2022-10-25 21:16:22
-LastEditTime: 2024-03-26 17:01:09
+LastEditTime: 2024-03-27 10:59:29
 LastEditors: Wenyu Ouyang
 Description: Test for data preprocess
 FilePath: \hydro-model-xaj\test\test_data_postprocess.py
 Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved.
 """
 
-import os
-import pandas as pd
-import numpy as np
 import matplotlib.pyplot as plt
 import spotpy
 from spotpy.examples.spot_setup_hymod_python import spot_setup as hymod_setup
-from trainers.evaluate import _read_save_sceua_calibrated_params
+
+from hydroutils import hydro_time
+
+from hydromodel.datasets.data_postprocess import show_events_result, show_ts_result
+from hydromodel.models.xaj import xaj
+from hydromodel.trainers.calibrate_sceua import calibrate_by_sceua
+from hydromodel.trainers.evaluate import _read_save_sceua_calibrated_params
 
 
 def test_run_hymod_calibration():
@@ -62,35 +65,63 @@ def test_run_hymod_calibration():
     plt.show()
 
 
-def test_read_save_sceua_calibrated_params(tmpdir):
-    # Create a temporary directory for testing
-    temp_dir = tmpdir.mkdir("test_data")
-
-    # Generate some dummy data
-    results = np.array(
-        [(1, 2, 3), (4, 5, 6), (7, 8, 9)],
-        dtype=[("par1", int), ("par2", int), ("par3", int)],
+def test_show_calibrate_sceua_result(p_and_e, qobs, warmup_length, db_name, basin_area):
+    sampler = calibrate_by_sceua(
+        p_and_e,
+        qobs,
+        db_name,
+        warmup_length,
+        model={
+            "name": "xaj_mz",
+            "source_type": "sources",
+            "source_book": "HF",
+        },
+        algorithm={
+            "name": "SCE_UA",
+            "random_seed": 1234,
+            "rep": 5,
+            "ngs": 7,
+            "kstop": 3,
+            "peps": 0.1,
+            "pcento": 0.1,
+        },
     )
-    spotpy.analyser.load_csv_results = lambda _: results
-    spotpy.analyser.get_minlikeindex = lambda _: (0, 0)
-
-    # Call the function
-    basin_id = "test_basin"
-    save_dir = temp_dir
-    sceua_calibrated_file_name = "test_results.csv"
-    result = _read_save_sceua_calibrated_params(
-        basin_id, save_dir, sceua_calibrated_file_name
+    train_period = hydro_time.t_range_days(["2012-01-01", "2017-01-01"])
+    show_events_result(
+        sampler.setup,
+        db_name,
+        warmup_length=warmup_length,
+        save_dir=db_name,
+        basin_id="basin_id",
+        train_period=train_period,
+        basin_area=basin_area,
     )
 
-    # Check if the file is saved correctly
-    expected_file_path = os.path.join(save_dir, basin_id + "_calibrate_params.txt")
-    assert os.path.exists(expected_file_path)
 
-    # Check if the saved file contains the expected data
-    expected_data = pd.DataFrame([(1, 2, 3)], columns=["par1", "par2", "par3"])
-    saved_data = pd.read_csv(expected_file_path)
-    pd.testing.assert_frame_equal(saved_data, expected_data)
+def test_show_test_result(p_and_e, qobs, warmup_length, db_name, basin_area):
+    params = _read_save_sceua_calibrated_params("basin_id", db_name, db_name)
+    qsim, _ = xaj(
+        p_and_e,
+        params,
+        warmup_length=warmup_length,
+        name="xaj_mz",
+        source_type="sources",
+        source_book="HF",
+    )
 
-    # Check if the returned result is correct
-    expected_result = np.array([(1, 2, 3)])
-    np.testing.assert_array_equal(result, expected_result)
+    qsim = units.convert_unit(
+        qsim,
+        unit_now="mm/day",
+        unit_final=units.unit["streamflow"],
+        basin_area=basin_area,
+    )
+    qobs = units.convert_unit(
+        qobs[warmup_length:, :, :],
+        unit_now="mm/day",
+        unit_final=units.unit["streamflow"],
+        basin_area=basin_area,
+    )
+    test_period = hydro_time.t_range_days(["2012-01-01", "2017-01-01"])
+    show_ts_result(
+        "basin_id", test_period[warmup_length:], qsim, qobs, save_dir=db_name
+    )
diff --git a/test/test_show_results.py b/test/test_show_results.py