From 5ba87c24444a93d5617cc6445435c3791945be6f Mon Sep 17 00:00:00 2001 From: LenkaNovak Date: Mon, 29 Jan 2024 08:13:09 -0800 Subject: [PATCH] fix initial file read fix bc, clarify test fix bc, clarify test fix comments rev + split init cases rev2 rev3 --- src/BCReader.jl | 100 ++++++++++++------- test/bcreader_tests.jl | 131 +++++++++++++++++-------- test/mpi_tests/bcreader_mpi_tests.jl | 140 +++++++++++++++++++-------- 3 files changed, 252 insertions(+), 119 deletions(-) diff --git a/src/BCReader.jl b/src/BCReader.jl index 92a81fbed9..e62c1bb4eb 100644 --- a/src/BCReader.jl +++ b/src/BCReader.jl @@ -95,7 +95,7 @@ and returns the info packaged in a single struct. - `boundary_space`: [Spaces.AbstractSpace] the space to which we are mapping. - `comms_ctx`: [ClimaComms.AbstractCommsContext] context used for this operation. - `interpolate_daily`: [Bool] switch to trigger daily interpolation. -- `segment_idx0`: [Vector{Int}] index of the file data that is closest to date0. +- `segment_idx0`: [Vector{Int}] reference date which, after initialization, refers to the the first file date index used minus 1 (segment_idx[1] - 1) - `scaling function`: [Function] scales, offsets or transforms `varname`. - `land_fraction`: [Fields.field] fraction with 1 = land, 0 = ocean / sea-ice. - `date0`: [Dates.DateTime] start date of the file data. @@ -182,22 +182,54 @@ The times for which data is extracted depends on the specifications in the function update_midmonth_data!(date, bcf_info::BCFileInfo{FT}) where {FT} # monthly count (; bcfile_dir, comms_ctx, hd_outfile_root, varname, all_dates, scaling_function) = bcf_info - midmonth_idx = bcf_info.segment_idx[1] - midmonth_idx0 = bcf_info.segment_idx0[1] - - if (midmonth_idx == midmonth_idx0) && (Dates.days(date - all_dates[midmonth_idx]) < 0) # for init - midmonth_idx = bcf_info.segment_idx[1] -= Int(1) - midmonth_idx = midmonth_idx < Int(1) ? midmonth_idx + Int(1) : midmonth_idx - @warn "this time period is before BC data - using file from $(all_dates[midmonth_idx0])" - bcf_info.monthly_fields[1] .= scaling_function( - Regridder.read_from_hdf5(bcfile_dir, hd_outfile_root, all_dates[Int(midmonth_idx0)], varname, comms_ctx), - bcf_info, - ) - bcf_info.monthly_fields[2] .= deepcopy(bcf_info.monthly_fields[1]) - bcf_info.segment_length .= Int(0) - - elseif Dates.days(date - all_dates[end - 1]) > 0 # for fini - @warn "this time period is after BC data - using file from $(all_dates[end - 1])" + segment_idx = bcf_info.segment_idx[1] # index of the current date in the file. [segment_idx, segment_idx+1] indexes the current segment between which we interpolate + segment_idx0 = bcf_info.segment_idx0[1] # reference index (first segment_idx - 1) + + # upon initialization (segment_idx == segment_idx0) with model date before the final file date + if (segment_idx == segment_idx0) && !((date - all_dates[end]).value >= 0) + # case 1: model date is before the first segment read from file + if (date - all_dates[segment_idx0]).value < 0 + @warn "This time period is before BC data - using file from $(all_dates[segment_idx0])" + bcf_info.monthly_fields[1] .= scaling_function( + Regridder.read_from_hdf5(bcfile_dir, hd_outfile_root, all_dates[Int(segment_idx0)], varname, comms_ctx), + bcf_info, + ) + bcf_info.monthly_fields[2] .= deepcopy(bcf_info.monthly_fields[1]) + bcf_info.segment_length .= Int(0) + bcf_info.segment_idx[1] -= Int(1) + bcf_info.segment_idx0[1] -= Int(2) + + # case 2: model date is after the first segment read from file + elseif (date - all_dates[Int(segment_idx0) + 1]).value >= 0 + nearest_idx = argmin( + abs.( + parse(FT, TimeManager.datetime_to_strdate(date)) .- + parse.(FT, TimeManager.datetime_to_strdate.(all_dates[:])) + ), + ) + @warn "Initializing with `segment_idx = $nearest_idx" + bcf_info.segment_idx[1] = nearest_idx + bcf_info.segment_idx0[1] = nearest_idx + update_midmonth_data!(date, bcf_info) + + # case 3: model date is within the first segment read from file + elseif (date - all_dates[segment_idx0]).value >= 0 + @warn "On $date updating file data reads: file dates = [ $(all_dates[segment_idx]) , $(all_dates[segment_idx+1]) ]" + bcf_info.segment_length .= (all_dates[segment_idx + 1] - all_dates[segment_idx]).value + bcf_info.monthly_fields[1] .= scaling_function( + Regridder.read_from_hdf5(bcfile_dir, hd_outfile_root, all_dates[segment_idx], varname, comms_ctx), + bcf_info, + ) + bcf_info.monthly_fields[2] .= scaling_function( + Regridder.read_from_hdf5(bcfile_dir, hd_outfile_root, all_dates[segment_idx + 1], varname, comms_ctx), + bcf_info, + ) + bcf_info.segment_idx0[1] -= Int(1) + end + + # case 4: date is at or after the last date in file + elseif (date - all_dates[end]).value >= 0 + @warn "This time period is after BC data - using file from $(all_dates[end])" bcf_info.monthly_fields[1] .= scaling_function( Regridder.read_from_hdf5( bcfile_dir, @@ -211,34 +243,28 @@ function update_midmonth_data!(date, bcf_info::BCFileInfo{FT}) where {FT} bcf_info.monthly_fields[2] .= deepcopy(bcf_info.monthly_fields[1]) bcf_info.segment_length .= Int(0) - # throw error when there are closer initial indices for the bc file data that matches this date0 - elseif Dates.days(date - all_dates[Int(midmonth_idx + 1)]) > 2 - nearest_idx = argmin( - abs.( - parse(FT, TimeManager.datetime_to_strdate(date)) .- - parse.(FT, TimeManager.datetime_to_strdate.(all_dates[:])) - ), - ) - # TODO test this - bcf_info.segment_idx[1] = midmonth_idx = midmonth_idx0 = nearest_idx - @warn "init data does not correspond to start date. Initializing with `SIC_info.segment_idx[1] = midmonth_idx = midmonth_idx0 = $nearest_idx` for this start date" - - # date crosses to the next month - elseif Dates.days(date - all_dates[Int(midmonth_idx)]) > 0 - midmonth_idx = bcf_info.segment_idx[1] += Int(1) - @warn "On $date updating monthly data files: mid-month dates = [ $(all_dates[Int(midmonth_idx)]) , $(all_dates[Int(midmonth_idx+1)]) ]" - bcf_info.segment_length .= (all_dates[Int(midmonth_idx + 1)] - all_dates[Int(midmonth_idx)]).value + # case 5: model date crosses to the next segment + elseif (date - all_dates[Int(segment_idx) + 1]).value >= 0 + segment_idx = bcf_info.segment_idx[1] += Int(1) + + bcf_info.segment_length .= (all_dates[Int(segment_idx + 1)] - all_dates[Int(segment_idx)]).value + bcf_info.monthly_fields[1] .= scaling_function( - Regridder.read_from_hdf5(bcfile_dir, hd_outfile_root, all_dates[Int(midmonth_idx)], varname, comms_ctx), + Regridder.read_from_hdf5(bcfile_dir, hd_outfile_root, all_dates[Int(segment_idx)], varname, comms_ctx), bcf_info, ) bcf_info.monthly_fields[2] .= scaling_function( - Regridder.read_from_hdf5(bcfile_dir, hd_outfile_root, all_dates[Int(midmonth_idx + 1)], varname, comms_ctx), + Regridder.read_from_hdf5(bcfile_dir, hd_outfile_root, all_dates[Int(segment_idx + 1)], varname, comms_ctx), bcf_info, ) + # case 6: undefined condition else - throw(ErrorException("Check boundary file specification")) + throw( + ErrorException( + "Check boundary file specification: segment: $(all_dates[segment_idx]) - $(all_dates[segment_idx+1]), date: $date", + ), + ) end end diff --git a/test/bcreader_tests.jl b/test/bcreader_tests.jl index f5ac7dee73..0540f1d073 100644 --- a/test/bcreader_tests.jl +++ b/test/bcreader_tests.jl @@ -227,60 +227,113 @@ for FT in (Float32, Float64) bcf_info.monthly_fields[1] .= current_fields[1] bcf_info.monthly_fields[2] .= current_fields[2] bcf_info.segment_length[1] = Int(1) + bcf_info.segment_idx[1] = bcf_info.segment_idx0[1] end hd_outfile_root = varname * "_cgll" - # case 1: segment_idx == segment_idx0, date < all_dates[segment_idx] + # case 1: date < all_dates[segment_idx] (init) bcf_info.segment_idx[1] = bcf_info.segment_idx0[1] date = DateTime(bcf_info.all_dates[bcf_info.segment_idx[1]] - Dates.Day(1)) BCReader.update_midmonth_data!(date, bcf_info) - @test bcf_info.monthly_fields[1] == bcf_info.scaling_function( - Regridder.read_from_hdf5( - regrid_dir, - hd_outfile_root, - bcf_info.all_dates[Int(bcf_info.segment_idx0[1])], - varname, - comms_ctx, - ), - bcf_info, - ) + # unmodified field @test bcf_info.monthly_fields[2] == bcf_info.monthly_fields[1] + # zero segment length @test bcf_info.segment_length[1] == Int(0) + # segment index is reset + @test bcf_info.segment_idx0[1] == bcf_info.segment_idx[1] - 1 + + # cases 2 and 3 + extra_days = [Dates.Day(0), Dates.Day(3)] + for extra in extra_days + # case 3: (date - all_dates[Int(segment_idx0)]) >= 0 (init) + reset_bcf_info(bcf_info) + date = DateTime(bcf_info.all_dates[bcf_info.segment_idx0[1]]) + extra + BCReader.update_midmonth_data!(date, bcf_info) + + end_field_c2 = deepcopy(bcf_info.monthly_fields[2]) + segment_length_c2 = deepcopy(bcf_info.segment_length[1]) + current_index_c2 = deepcopy(bcf_info.segment_idx[1]) + + # modified field + @test end_field_c2 !== bcf_info.monthly_fields[1] + # updated segment length + @test segment_length_c2[1] !== Int(0) + # updated reference segment index + @test current_index_c2 == bcf_info.segment_idx0[1] + 1 + + # case 2: (date - all_dates[Int(segment_idx0) + 1]) >= 0 (init) + # do not reset segment_idx0. It's current value ensures that we get the same result as case 3 + reset_bcf_info(bcf_info) + + date = DateTime(bcf_info.all_dates[bcf_info.segment_idx0[1] + 1]) + extra + BCReader.update_midmonth_data!(date, bcf_info) + + nearest_idx = argmin( + abs.( + parse(FT, TimeManager.datetime_to_strdate(date)) .- + parse.(FT, TimeManager.datetime_to_strdate.(bcf_info.all_dates[:])) + ), + ) + + @test bcf_info.segment_idx[1] == bcf_info.segment_idx0[1] + 1 == nearest_idx + + # compare to case 3 (expecting the same result - this defaults to it): + @test bcf_info.monthly_fields[1] == bcf_info.scaling_function( + Regridder.read_from_hdf5( + regrid_dir, + hd_outfile_root, + bcf_info.all_dates[Int(bcf_info.segment_idx[1])], + varname, + comms_ctx, + ), + bcf_info, + ) + + # check case 2 defaults to case 3 + @test end_field_c2 !== bcf_info.monthly_fields[1] + @test segment_length_c2[1] !== Int(0) + @test current_index_c2 == bcf_info.segment_idx0[1] + 1 - # case 2: date > all_dates[end - 1] - reset_bcf_info(bcf_info) - date = DateTime(bcf_info.all_dates[end - 1] + Dates.Day(1)) - BCReader.update_midmonth_data!(date, bcf_info) + end - @test bcf_info.monthly_fields[1] == bcf_info.scaling_function( - Regridder.read_from_hdf5( - regrid_dir, - hd_outfile_root, - bcf_info.all_dates[Int(length(bcf_info.all_dates))], - varname, - comms_ctx, - ), - bcf_info, - ) - @test bcf_info.monthly_fields[2] == bcf_info.monthly_fields[1] - @test bcf_info.segment_length[1] == Int(0) + # case 4: date > all_dates[end] + for extra in extra_days + bcf_info.segment_idx0[1] = length(bcf_info.all_dates) + reset_bcf_info(bcf_info) + + date = DateTime(bcf_info.all_dates[bcf_info.segment_idx0[1]]) + extra + BCReader.update_midmonth_data!(date, bcf_info) + + @test bcf_info.monthly_fields[1] == bcf_info.scaling_function( + Regridder.read_from_hdf5( + regrid_dir, + hd_outfile_root, + bcf_info.all_dates[Int(length(bcf_info.all_dates))], + varname, + comms_ctx, + ), + bcf_info, + ) + @test bcf_info.monthly_fields[2] == bcf_info.monthly_fields[1] + @test bcf_info.segment_length[1] == Int(0) + end - # case 3: date - all_dates[segment_idx + 1] > 2 - reset_bcf_info(bcf_info) - date = DateTime(bcf_info.all_dates[bcf_info.segment_idx[1] + 1] + Dates.Day(3)) - BCReader.update_midmonth_data!(date, bcf_info) + # case 5: Dates.days(date - all_dates[segment_idx]) >= 0 - nearest_idx = argmin( - abs.( - parse(FT, TimeManager.datetime_to_strdate(date)) .- - parse.(FT, TimeManager.datetime_to_strdate.(bcf_info.all_dates[:])) - ), - ) - @test bcf_info.segment_idx[1] == bcf_info.segment_idx0[1] == nearest_idx + extra = Dates.Day(3) + for extra in extra_days + bcf_info.segment_idx0[1] = 2 + reset_bcf_info(bcf_info) + + date = DateTime(bcf_info.all_dates[bcf_info.segment_idx0[1]] + extra) + BCReader.update_midmonth_data!(date, bcf_info) + + @test bcf_info.segment_idx[1] == bcf_info.segment_idx0[1] + 1 + end - # case 4: everything else + # case 6: everything else reset_bcf_info(bcf_info) bcf_info.segment_idx[1] = bcf_info.segment_idx0[1] + Int(1) date = bcf_info.all_dates[bcf_info.segment_idx[1]] - Dates.Day(1) diff --git a/test/mpi_tests/bcreader_mpi_tests.jl b/test/mpi_tests/bcreader_mpi_tests.jl index 5adb7e16de..6749ba67c0 100644 --- a/test/mpi_tests/bcreader_mpi_tests.jl +++ b/test/mpi_tests/bcreader_mpi_tests.jl @@ -4,7 +4,7 @@ These are in a separate testing file from the other BCReader unit tests so that MPI can be enabled for testing of these functions. =# - +using ClimaCoupler using ClimaCoupler: Regridder, BCReader, TimeManager, Interfacer using ClimaCore: Fields, Meshes, Domains, Topologies, Spaces using ClimaComms @@ -14,7 +14,8 @@ using NCDatasets import ArtifactWrappers as AW # Get the path to the necessary data file - sst map -include(joinpath(@__DIR__, "..", "..", "artifacts", "artifact_funcs.jl")) +pkg_dir = pkgdir(ClimaCoupler) +include(joinpath(pkg_dir, "artifacts", "artifact_funcs.jl")) const sst_data = joinpath(sst_dataset_path(), "sst.nc") # set up MPI communications context @@ -176,65 +177,118 @@ end bcf_info.monthly_fields[1] .= current_fields[1] bcf_info.monthly_fields[2] .= current_fields[2] bcf_info.segment_length[1] = Int(1) + bcf_info.segment_idx[1] = bcf_info.segment_idx0[1] end hd_outfile_root = varname * "_cgll" - # case 1: segment_idx == segment_idx0, date < all_dates[segment_idx] + # case 1: date < all_dates[segment_idx] (init) bcf_info.segment_idx[1] = bcf_info.segment_idx0[1] date = DateTime(bcf_info.all_dates[bcf_info.segment_idx[1]] - Dates.Day(1)) BCReader.update_midmonth_data!(date, bcf_info) ClimaComms.barrier(comms_ctx) - @test bcf_info.monthly_fields[1] == bcf_info.scaling_function( - Regridder.read_from_hdf5( - regrid_dir, - hd_outfile_root, - bcf_info.all_dates[Int(bcf_info.segment_idx0[1])], - varname, - comms_ctx, - ), - bcf_info, - ) + # unmodified field @test bcf_info.monthly_fields[2] == bcf_info.monthly_fields[1] + # zero segment length @test bcf_info.segment_length[1] == Int(0) + # segment index is reset + @test bcf_info.segment_idx0[1] == bcf_info.segment_idx[1] - 1 + + # cases 2 and 3 + extra_days = [Dates.Day(0), Dates.Day(3)] + for extra in extra_days + # case 3: (date - all_dates[Int(segment_idx0)]) >= 0 (init) + reset_bcf_info(bcf_info) + date = DateTime(bcf_info.all_dates[bcf_info.segment_idx0[1]]) + extra + BCReader.update_midmonth_data!(date, bcf_info) + + end_field_c2 = deepcopy(bcf_info.monthly_fields[2]) + segment_length_c2 = deepcopy(bcf_info.segment_length[1]) + current_index_c2 = deepcopy(bcf_info.segment_idx[1]) + + ClimaComms.barrier(comms_ctx) + # modified field + @test end_field_c2 !== bcf_info.monthly_fields[1] + # updated segment length + @test segment_length_c2[1] !== Int(0) + # updated reference segment index + @test current_index_c2 == bcf_info.segment_idx0[1] + 1 + + # case 2: (date - all_dates[Int(segment_idx0) + 1]) >= 0 (init) + # do not reset segment_idx0. It's current value ensures that we get the same result as case 3 + reset_bcf_info(bcf_info) + + date = DateTime(bcf_info.all_dates[bcf_info.segment_idx0[1] + 1]) + extra + BCReader.update_midmonth_data!(date, bcf_info) + + nearest_idx = argmin( + abs.( + parse(FT, TimeManager.datetime_to_strdate(date)) .- + parse.(FT, TimeManager.datetime_to_strdate.(bcf_info.all_dates[:])) + ), + ) + + ClimaComms.barrier(comms_ctx) + @test bcf_info.segment_idx[1] == bcf_info.segment_idx0[1] + 1 == nearest_idx + + # compare to case 3 (expecting the same result - this defaults to it): + @test bcf_info.monthly_fields[1] == bcf_info.scaling_function( + Regridder.read_from_hdf5( + regrid_dir, + hd_outfile_root, + bcf_info.all_dates[Int(bcf_info.segment_idx[1])], + varname, + comms_ctx, + ), + bcf_info, + ) + + # check case 2 defaults to case 3 + @test end_field_c2 !== bcf_info.monthly_fields[1] + @test segment_length_c2[1] !== Int(0) + @test current_index_c2 == bcf_info.segment_idx0[1] + 1 - # case 2: date > all_dates[end - 1] - reset_bcf_info(bcf_info) - date = DateTime(bcf_info.all_dates[end - 1] + Dates.Day(1)) - BCReader.update_midmonth_data!(date, bcf_info) + end - ClimaComms.barrier(comms_ctx) + # case 4: date > all_dates[end] + for extra in extra_days + bcf_info.segment_idx0[1] = length(bcf_info.all_dates) + reset_bcf_info(bcf_info) + + date = DateTime(bcf_info.all_dates[bcf_info.segment_idx0[1]]) + extra + BCReader.update_midmonth_data!(date, bcf_info) + + ClimaComms.barrier(comms_ctx) + @test bcf_info.monthly_fields[1] == bcf_info.scaling_function( + Regridder.read_from_hdf5( + regrid_dir, + hd_outfile_root, + bcf_info.all_dates[Int(length(bcf_info.all_dates))], + varname, + comms_ctx, + ), + bcf_info, + ) + @test bcf_info.monthly_fields[2] == bcf_info.monthly_fields[1] + @test bcf_info.segment_length[1] == Int(0) + end - @test bcf_info.monthly_fields[1] == bcf_info.scaling_function( - Regridder.read_from_hdf5( - regrid_dir, - hd_outfile_root, - bcf_info.all_dates[Int(length(bcf_info.all_dates))], - varname, - comms_ctx, - ), - bcf_info, - ) - @test bcf_info.monthly_fields[2] == bcf_info.monthly_fields[1] - @test bcf_info.segment_length[1] == Int(0) + # case 5: Dates.days(date - all_dates[segment_idx]) >= 0 - # case 3: date - all_dates[segment_idx + 1] > 2 - reset_bcf_info(bcf_info) - date = DateTime(bcf_info.all_dates[bcf_info.segment_idx[1] + 1] + Dates.Day(3)) - BCReader.update_midmonth_data!(date, bcf_info) + extra = Dates.Day(3) + for extra in extra_days + bcf_info.segment_idx0[1] = 2 + reset_bcf_info(bcf_info) - nearest_idx = argmin( - abs.( - parse(FT, TimeManager.datetime_to_strdate(date)) .- - parse.(FT, TimeManager.datetime_to_strdate.(bcf_info.all_dates[:])) - ), - ) + date = DateTime(bcf_info.all_dates[bcf_info.segment_idx0[1]] + extra) + BCReader.update_midmonth_data!(date, bcf_info) - ClimaComms.barrier(comms_ctx) - @test bcf_info.segment_idx[1] == bcf_info.segment_idx0[1] == nearest_idx + ClimaComms.barrier(comms_ctx) + @test bcf_info.segment_idx[1] == bcf_info.segment_idx0[1] + 1 + end - # case 4: everything else + # case 6: everything else reset_bcf_info(bcf_info) bcf_info.segment_idx[1] = bcf_info.segment_idx0[1] + Int(1) date = bcf_info.all_dates[bcf_info.segment_idx[1]] - Dates.Day(1)