Skip to content

Commit 57084e9

Browse files
committed
plot cv results
1 parent f3c1d38 commit 57084e9

File tree

5 files changed

+119
-190
lines changed

5 files changed

+119
-190
lines changed

hydromodel/datasets/data_postprocess.py

Lines changed: 5 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
"""Show results of calibration and validation."""
2+
23
import os
34
from matplotlib import pyplot as plt
45
import numpy as np
56
import pandas as pd
6-
import spotpy
77

88
from hydroutils import hydro_file, hydro_stat
99

@@ -54,18 +54,15 @@ def plot_train_iteration(likelihood, save_fig):
5454
plt.close()
5555

5656

57-
def show_sceua_cali_result(
58-
sceua_calibrated_file,
57+
def show_events_result(
5958
warmup_length,
6059
save_dir,
61-
basin_id,
6260
train_period,
63-
result_unit="mm/hour",
6461
basin_area=None,
6562
prcp=None,
6663
):
6764
"""
68-
Plot all year result to see the effect of optimized parameters
65+
Plot all events result to see the effect of optimized parameters
6966
7067
Parameters
7168
----------
@@ -84,65 +81,7 @@ def show_sceua_cali_result(
8481
-------
8582
None
8683
"""
87-
# Load the results gained with the sceua sampler, stored in SCEUA_xaj.csv
88-
# results = []
89-
# for chunk in pd.read_csv(sceua_calibrated_file, chunksize=100000 ):
90-
# results.append(chunk)
91-
# results = pd.concat(results)
92-
results = spotpy.analyser.load_csv_results(sceua_calibrated_file) # 读取结果
93-
# Plot how the objective function was minimized during sampling
94-
if not os.path.exists(save_dir): # 绘制采样过程中目标函数的最小化情况
95-
os.makedirs(save_dir)
96-
plot_train_iteration(
97-
results["like1"],
98-
os.path.join(save_dir, "train_iteration.png"), # 绘制迭代中的RMSE
99-
)
100-
# Plot the best model run
101-
# Find the run_id with the minimal objective function value
102-
bestindex, bestobjf = spotpy.analyser.get_minlikeindex(
103-
results
104-
) # 绘制最佳模型图并找到run—id
105-
# Select best model run
106-
best_model_run = results[bestindex] # 选择最佳模型结果
107-
# Filter results for simulation results #最佳模型模拟结果
108-
fields = [word for word in best_model_run.dtype.names if word.startswith("sim")]
109-
best_simulation = list(best_model_run[fields])
110-
convert_unit_sim = units.convert_unit(
111-
np.array(best_simulation).reshape(1, -1),
112-
# np.array(list(map(float, best_simulation)), dtype=float).reshape(1, -1),
113-
result_unit,
114-
units.unit["streamflow"],
115-
basin_area=basin_area,
116-
)
117-
convert_unit_obs = units.convert_unit(
118-
np.array(spot_setup.evaluation()).reshape(1, -1),
119-
result_unit,
120-
units.unit["streamflow"],
121-
basin_area=basin_area,
122-
)
123-
124-
# save calibrated results of calibration period #保存率定的结果
125-
train_result_file = os.path.join(
126-
save_dir,
127-
"train_qsim_" + spot_setup.model["name"] + "_" + str(basin_id) + ".csv",
128-
)
129-
pd.DataFrame(convert_unit_sim.reshape(-1, 1)).to_csv(
130-
train_result_file,
131-
sep=",",
132-
index=False,
133-
header=False,
134-
)
135-
# calculation rmse、nashsutcliffe and bias for training period
136-
stat_error = hydro_stat.stat_error(
137-
convert_unit_obs,
138-
convert_unit_sim,
139-
)
140-
print("Training Metrics:", basin_id, stat_error)
141-
hydro_file.serialize_json_np(
142-
stat_error, os.path.join(save_dir, "train_metrics.json")
143-
)
144-
145-
# 循还画图
84+
# TODO: not finished
14685
time = pd.read_excel(
14786
"D:/研究生/毕业论文/new毕业论文/预答辩/碧流河水库/站点信息/洪水率定时间.xlsx"
14887
)
@@ -253,7 +192,7 @@ def show_sceua_cali_result(
253192
plot_sim_and_obs(t_range_train, best_simulation, obs, prcp[:], save_fig)
254193

255194

256-
def show_test_result(basin_id, test_date, qsim, obs, save_dir):
195+
def show_ts_result(basin_id, test_date, qsim, obs, save_dir):
257196
stat_error = hydro_stat.stat_error(obs.reshape(1, -1), qsim.reshape(1, -1))
258197
print("Test Metrics:", basin_id, stat_error)
259198
hydro_file.serialize_json_np(
@@ -371,7 +310,3 @@ def show_test_result(basin_id, test_date, qsim, obs, save_dir):
371310
prcp[:],
372311
save_fig,
373312
)
374-
# summarize_parameters(one_model_one_hyperparam_setting_dir, {"name": "xaj_mz"})
375-
# renormalize_params(one_model_one_hyperparam_setting_dir, {"name":"xaj_mz"})
376-
# summarize_metrics(one_model_one_hyperparam_setting_dir,{"name":"xaj_mz"})
377-
# save_streamflow(one_model_one_hyperparam_setting_dir,{"name":"xaj_mz"})

hydromodel/trainers/evaluate.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Author: Wenyu Ouyang
33
Date: 2022-10-25 21:16:22
4-
LastEditTime: 2024-03-27 10:42:57
4+
LastEditTime: 2024-03-27 11:18:09
55
LastEditors: Wenyu Ouyang
66
Description: Plots for calibration and testing results
77
FilePath: \hydro-model-xaj\hydromodel\trainers\evaluate.py
@@ -197,9 +197,13 @@ def _summarize_metrics(self, basin_ids):
197197
model_name = self.model_info["name"]
198198
file_path = os.path.join(result_dir, f"{model_name}_evaluation_results.nc")
199199
ds = xr.open_dataset(file_path)
200+
# for metrics, warmup_length should be considered
201+
warmup_length = self.config["warmup"]
202+
qobs = ds["qobs"].transpose("basin", "time").to_numpy()[:, warmup_length:]
203+
qsim = ds["qsim"].transpose("basin", "time").to_numpy()[:, warmup_length:]
200204
test_metrics = hydro_stat.stat_error(
201-
ds["qobs"].transpose("basin", "time").to_numpy(),
202-
ds["qsim"].transpose("basin", "time").to_numpy(),
205+
qobs,
206+
qsim,
203207
)
204208
metric_dfs_test = pd.DataFrame(test_metrics, index=basin_ids)
205209
metric_file_test = os.path.join(result_dir, "basins_metrics.csv")
@@ -224,6 +228,12 @@ def _save_evaluate_results(self, qsim, qobs, obs_ds):
224228

225229
print(f"Results saved to: {file_path}")
226230

231+
def load_results(self):
232+
result_dir = self.save_dir
233+
model_name = self.model_info["name"]
234+
file_path = os.path.join(result_dir, f"{model_name}_evaluation_results.nc")
235+
return xr.open_dataset(file_path)
236+
227237

228238
def _read_save_sceua_calibrated_params(basin_id, save_dir, sceua_calibrated_file_name):
229239
"""

scripts/post_process.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Author: Wenyu Ouyang
33
Date: 2022-11-19 17:27:05
4-
LastEditTime: 2024-03-27 08:37:58
4+
LastEditTime: 2024-03-27 11:23:02
55
LastEditors: Wenyu Ouyang
66
Description: the script to postprocess results
77
FilePath: \hydro-model-xaj\scripts\post_process.py
@@ -15,20 +15,47 @@
1515

1616
repo_dir = os.path.dirname(Path(os.path.abspath(__file__)).parent)
1717
sys.path.append(repo_dir)
18-
from hydromodel.trainers.evaluate import read_yaml_config
18+
from hydromodel.datasets.data_postprocess import plot_sim_and_obs
19+
from hydromodel.trainers.evaluate import Evaluator, read_yaml_config
1920

2021

21-
def statistics(args):
22+
def visualize(args):
2223
exp = args.exp
2324
cali_dir = Path(os.path.join(repo_dir, "result", exp))
2425
cali_config = read_yaml_config(os.path.join(cali_dir, "config.yaml"))
25-
if os.path.exists(cali_dir) is False:
26-
raise NotImplementedError(
27-
"You should run prepare_data and calibrate scripts at first."
28-
)
29-
print(
30-
"Compare evaluation results of different calibrated models in an exp directory"
31-
)
26+
kfold = cali_config["cv_fold"]
27+
basins = cali_config["basin_id"]
28+
warmup = cali_config["warmup"]
29+
for fold in range(kfold):
30+
print(f"Start to evaluate the {fold+1}-th fold")
31+
fold_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}")
32+
# evaluate both train and test period for all basins
33+
eval_train_dir = os.path.join(fold_dir, "train")
34+
eval_test_dir = os.path.join(fold_dir, "test")
35+
train_eval = Evaluator(cali_dir, fold_dir, eval_train_dir)
36+
test_eval = Evaluator(cali_dir, fold_dir, eval_test_dir)
37+
ds_train = train_eval.load_results()
38+
ds_test = test_eval.load_results()
39+
for basin in basins:
40+
save_fig_train = os.path.join(eval_train_dir, f"train_sim_obs_{basin}.png")
41+
plot_sim_and_obs(
42+
ds_train["time"].isel(time=slice(warmup, None)),
43+
ds_train["qsim"].sel(basin=basin).isel(time=slice(warmup, None)),
44+
ds_train["qobs"].sel(basin=basin).isel(time=slice(warmup, None)),
45+
save_fig_train,
46+
xlabel="Date",
47+
ylabel=None,
48+
)
49+
save_fig_test = os.path.join(eval_test_dir, f"test_sim_obs_{basin}.png")
50+
plot_sim_and_obs(
51+
ds_test["time"].isel(time=slice(warmup, None)),
52+
ds_test["qsim"].sel(basin=basin).isel(time=slice(warmup, None)),
53+
ds_test["qobs"].sel(basin=basin).isel(time=slice(warmup, None)),
54+
save_fig_test,
55+
xlabel="Date",
56+
ylabel=None,
57+
)
58+
print(f"Finish visualizing the {fold}-th fold")
3259

3360

3461
if __name__ == "__main__":
@@ -43,4 +70,4 @@ def statistics(args):
4370
type=str,
4471
)
4572
the_args = parser.parse_args()
46-
statistics(the_args)
73+
visualize(the_args)

test/test_data_postprocess.py

Lines changed: 63 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
"""
22
Author: Wenyu Ouyang
33
Date: 2022-10-25 21:16:22
4-
LastEditTime: 2024-03-26 17:01:09
4+
LastEditTime: 2024-03-27 10:59:29
55
LastEditors: Wenyu Ouyang
66
Description: Test for data preprocess
77
FilePath: \hydro-model-xaj\test\test_data_postprocess.py
88
Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved.
99
"""
1010

11-
import os
12-
import pandas as pd
13-
import numpy as np
1411
import matplotlib.pyplot as plt
1512
import spotpy
1613
from spotpy.examples.spot_setup_hymod_python import spot_setup as hymod_setup
17-
from trainers.evaluate import _read_save_sceua_calibrated_params
14+
15+
from hydroutils import hydro_time
16+
17+
from hydromodel.datasets.data_postprocess import show_events_result, show_ts_result
18+
from hydromodel.models.xaj import xaj
19+
from hydromodel.trainers.calibrate_sceua import calibrate_by_sceua
20+
from hydromodel.trainers.evaluate import _read_save_sceua_calibrated_params
1821

1922

2023
def test_run_hymod_calibration():
@@ -62,35 +65,63 @@ def test_run_hymod_calibration():
6265
plt.show()
6366

6467

65-
def test_read_save_sceua_calibrated_params(tmpdir):
66-
# Create a temporary directory for testing
67-
temp_dir = tmpdir.mkdir("test_data")
68-
69-
# Generate some dummy data
70-
results = np.array(
71-
[(1, 2, 3), (4, 5, 6), (7, 8, 9)],
72-
dtype=[("par1", int), ("par2", int), ("par3", int)],
68+
def test_show_calibrate_sceua_result(p_and_e, qobs, warmup_length, db_name, basin_area):
69+
sampler = calibrate_by_sceua(
70+
p_and_e,
71+
qobs,
72+
db_name,
73+
warmup_length,
74+
model={
75+
"name": "xaj_mz",
76+
"source_type": "sources",
77+
"source_book": "HF",
78+
},
79+
algorithm={
80+
"name": "SCE_UA",
81+
"random_seed": 1234,
82+
"rep": 5,
83+
"ngs": 7,
84+
"kstop": 3,
85+
"peps": 0.1,
86+
"pcento": 0.1,
87+
},
7388
)
74-
spotpy.analyser.load_csv_results = lambda _: results
75-
spotpy.analyser.get_minlikeindex = lambda _: (0, 0)
76-
77-
# Call the function
78-
basin_id = "test_basin"
79-
save_dir = temp_dir
80-
sceua_calibrated_file_name = "test_results.csv"
81-
result = _read_save_sceua_calibrated_params(
82-
basin_id, save_dir, sceua_calibrated_file_name
89+
train_period = hydro_time.t_range_days(["2012-01-01", "2017-01-01"])
90+
show_events_result(
91+
sampler.setup,
92+
db_name,
93+
warmup_length=warmup_length,
94+
save_dir=db_name,
95+
basin_id="basin_id",
96+
train_period=train_period,
97+
basin_area=basin_area,
8398
)
8499

85-
# Check if the file is saved correctly
86-
expected_file_path = os.path.join(save_dir, basin_id + "_calibrate_params.txt")
87-
assert os.path.exists(expected_file_path)
88100

89-
# Check if the saved file contains the expected data
90-
expected_data = pd.DataFrame([(1, 2, 3)], columns=["par1", "par2", "par3"])
91-
saved_data = pd.read_csv(expected_file_path)
92-
pd.testing.assert_frame_equal(saved_data, expected_data)
101+
def test_show_test_result(p_and_e, qobs, warmup_length, db_name, basin_area):
102+
params = _read_save_sceua_calibrated_params("basin_id", db_name, db_name)
103+
qsim, _ = xaj(
104+
p_and_e,
105+
params,
106+
warmup_length=warmup_length,
107+
name="xaj_mz",
108+
source_type="sources",
109+
source_book="HF",
110+
)
93111

94-
# Check if the returned result is correct
95-
expected_result = np.array([(1, 2, 3)])
96-
np.testing.assert_array_equal(result, expected_result)
112+
qsim = units.convert_unit(
113+
qsim,
114+
unit_now="mm/day",
115+
unit_final=units.unit["streamflow"],
116+
basin_area=basin_area,
117+
)
118+
qobs = units.convert_unit(
119+
qobs[warmup_length:, :, :],
120+
unit_now="mm/day",
121+
unit_final=units.unit["streamflow"],
122+
basin_area=basin_area,
123+
)
124+
test_period = hydro_time.t_range_days(["2012-01-01", "2017-01-01"])
125+
show_ts_result(
126+
"basin_id", test_period[warmup_length:], qsim, qobs, save_dir=db_name
127+
)

0 commit comments

Comments
 (0)