Skip to content

Latest commit

 

History

History
497 lines (412 loc) · 23.8 KB

AQ_subshelf_melt.org

File metadata and controls

497 lines (412 loc) · 23.8 KB

Table of contents

Introduction

Data example

Printout

xr.open_dataset('./dat/AQ_subshelf_melt.nc')
<xarray.Dataset> Size: 5kB
Dimensions:      (time: 31, region: 18)
Coordinates:
  * time         (time) datetime64[ns] 248B 1991-07-01 1992-07-01 ... 2021-07-01
  * region       (region) int32 72B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
Data variables:
    melt         (region, time) float64 4kB ...
    region_name  (region) <U5 360B ...
Attributes:
    description:   Antarctic region sub-shelf melt
    date_created:  20241101T045557Z
    title:         Subshelf melt per region
    history:       Processed for Schmidt (YYYY; in prep); by Ken Mankoff
    source:        doi:10.5067/SE3XH9RXQWAM and doi:10.5281/ZENODO.8052519
    Conventions:   CF-1.8
    DOI:           https://doi.org/10.5281/zenodo.14020895

Graphic

import xarray as xr
ds = xr.open_dataset('./dat/AQ_subshelf_melt.nc')
df = ds.sum(dim='region')['melt'].to_dataframe()
ax = df['melt'].plot(drawstyle='steps-post')
_ = ax.set_ylabel('Subshelf melt [Gt yr$^{-1}$]')

./fig/AQ_subshelf_melt.png

Subshelf melt by region

import xarray as xr
import matplotlib.pyplot as plt

ds = xr.open_dataset('dat/AQ_subshelf_melt.nc')
ds = ds.assign_coords(region=[_[0] + ' ['+str(_[1])+']' for _ in zip(ds['region_name'].values,ds['region'].values)])
df = ds['melt'].to_dataframe()
df = df['melt'].unstack().T
df['AQ'] = df.sum(axis='columns')
df.loc['Mean'] = df.mean(axis='rows')
df.index = [_[0:10] for _ in df.index.astype(str)]
df.round()
A-Ap [1]Ap-B [2]B-C [3]C-Cp [4]Cp-D [5]D-Dp [6]Dp-E [7]E-Ep [8]Ep-F [9]F-G [10]G-H [11]H-Hp [12]Hp-I [13]I-Ipp [14]Ipp-J [15]J-Jpp [16]Jpp-K [17]K-A [18]AQ
1991-07-017737-17-1110727927118187221114965378041281199
1992-07-018213-4691001411143180192253661041765712254741605
1993-07-013233-25-2184362225871442086510552045-36-19836
1994-07-0136172063921701291522274152544104-13931053
1995-07-01-491623377120181-83124257381413917-1231652614
1996-07-018632441410211-1-55217020231154-38-9-1904236733
1997-07-0183351743802710-11914928056904015-17-15-15874
1998-07-0198331531822425071148274641102024-178511089
1999-07-0168293318872017-283913426871894616360431120
2000-07-01392513296822122463141286808232-116-45-40688
2001-07-0152192010266240-308210525668103271242-740981
2002-07-017221637481271112130268821234891493161199
2003-07-018918857688212222771472987498-7530420251462
2004-07-018427799696225-5-621272805312959-77-107867
2005-07-014928651031171111502416430878867418433191250
2006-07-011291763802710-3120160296621236520-28324931
2007-07-01238454191331315151453348197631414023481230
2008-07-01-21255285101444391503068011756184213631102
2009-07-01-3293174971312-69-10112626573137-42121491037801
2010-07-011722149961816-14-941182593110882535-771758
2011-07-016419-1249103910-31-1116124275116-2231943746977
2012-07-01712420818471655-6135190739814297023181004
2013-07-015425325867122267716721378128144321778291321
2014-07-014926352784102024421361896510017-7365755
2015-07-0146307575111640-681281736686121863-1-3706
2016-07-0162304568891918912618181142172234520977
2017-07-01502625258781724381121666613121445521861
2018-07-016330-92794101617-2714724487184152042-511966
2019-07-017929-12449471611-2615224998184242223-10211004
2020-07-017725-23579581921-30159253109179362222-6221045
2021-07-016229-356594824-3-11617225412918754219-223976
Mean502318438717141420145248721192817451028999

Processing

  • Antarctic ice shelf melt is from NSIDC 0792 (Paolo, 2024) and Davison (2023)

NSIDC 0792 (1992 – 2017)

Unit check:

import xarray as xr
root = "~/data/NSIDC/NSIDC-0792.001/1992.03.17"
ds = xr.open_dataset(root + '/NSIDC-0792_19920317-20171216_V01.0.nc', chunks='auto')
ds = ds['melt']

# convert from m/year ice on 1920x1920 grid to Gt/year water
ds = ds * 1920 * 1920 * 0.917 / 1E9
ds = ds.sum(dim=['x','y'])
ds = ds.resample({'time':'YS-JUL'}).mean()
df = ds.to_dataframe()
df
timemelt
1991-07-01 00:00:00-1199.32
1992-07-01 00:00:00-1605.41
1993-07-01 00:00:00-836.263
1994-07-01 00:00:00-1052.61
1995-07-01 00:00:00-613.902
1996-07-01 00:00:00-732.728
1997-07-01 00:00:00-717.547
1998-07-01 00:00:00-1108.82
1999-07-01 00:00:00-1213.66
2000-07-01 00:00:00-330.797
2001-07-01 00:00:00-845.425
2002-07-01 00:00:00-1153.3
2003-07-01 00:00:00-1778.67
2004-07-01 00:00:00-458.056
2005-07-01 00:00:00-1230.67
2006-07-01 00:00:00-613.852
2007-07-01 00:00:00-1306.97
2008-07-01 00:00:00-1055.59
2009-07-01 00:00:00-738.313
2010-07-01 00:00:00-641.088
2011-07-01 00:00:00-916.554
2012-07-01 00:00:00-863.636
2013-07-01 00:00:00-1512.85
2014-07-01 00:00:00-390.767
2015-07-01 00:00:00-311.896
2016-07-01 00:00:00-918.112
2017-07-01 00:00:00-742.674
import numpy as np
import pandas as pd
import geopandas as gpd
import flox # faster groupby
import flox.xarray
import xarray as xr
from shapely.geometry import Point

root = "~/data/NSIDC/NSIDC-0792.001/1992.03.17"
ds = xr.open_dataset(root + '/NSIDC-0792_19920317-20171216_V01.0.nc', chunks='auto')
ds = ds[['melt','melt_mean','melt_err','ID']]

# ds['melt'] = ds['melt'] # + ds['melt_mean']
# ds = ds.drop_vars(['melt_mean'])
#print("annual averages...")
#ds = ds.resample({'time':'YS'}).sum()

# shelf name with longitude and latitude
df = pd.read_excel("~/data/Davison_2023/adi0186_table_s2.xlsx",
                   sheet_name = 'Total mass changes',
                   usecols = (1,2,3), index_col = 0, skiprows = 4)
df = df.dropna()
shelf = gpd.GeoDataFrame(
    geometry=gpd.points_from_xy(df.longitude, df.latitude, crs="EPSG:4326"),
    data=df)
shelf = shelf.to_crs('EPSG:3031')
# region name
region = gpd.read_file("~/data//IMBIE/Rignot/ANT_Basins_IMBIE2_v1.6.shp")
region = region[region['Regions'] != 'Islands']
# find regions nearest each shelf
shelf_region = gpd.sjoin_nearest(shelf,region)\
                  .drop(columns=['index_right','latitude','longitude','Regions'])


# Want groupby mean so need these as vars not just coords
ds['xx'] = (('x'), ds['x'].values)
ds['yy'] = (('y'), ds['y'].values)

ds['melt_err'] = ds['melt_err']**2
ds_xy = xr.merge([
    flox.xarray.xarray_reduce(ds[["xx","yy"]],
                              ds['ID'],
                              func="mean",
                              expected_groups=np.unique(ds['ID'].values)),
    flox.xarray.xarray_reduce(ds[["melt","melt_err"]],
                              ds['ID'],
                              func="sum",
                              expected_groups=np.unique(ds['ID'].values)),
])
ds_xy = ds_xy.rename_vars({'xx':'x', 'yy':'y'})
ds_xy['melt_err'] = ds_xy['melt_err']**0.5

# Convert the xarray dataset's coordinates to a GeoDataFrame
points = [Point(x,y) for x,y in
          zip(ds_xy['x'].values.flatten(),
              ds_xy['y'].values.flatten())]
gdf_ds_xy = gpd.GeoDataFrame(geometry=points, crs='EPSG:3031')

# find region nearest each NSIDC 0792 x,y coordinate
xy_region = gpd.sjoin_nearest(gdf_ds_xy, shelf_region)

ds_xy['region'] = (('ID'), xy_region['Subregion'].values)
ds = ds_xy.groupby('region').sum().drop_vars(['x','y'])

ds['time'] = [pd.to_datetime(_.astype(str)[0:10]) for _ in ds['time'].values]
ds = ds.resample({'time':'YS-JUL'}).mean()

# convert from m/year ice on 1920x1920 grid to Gt/year water per sector
ds = -1 * ds * 1920 * 1920 * 0.917 / 1E9

delayed_obj = ds.to_netcdf('tmp/aq_paolo_2024.nc', compute=False)
from dask.diagnostics import ProgressBar
with ProgressBar():
    results = delayed_obj.compute()
[########################################] | 100% Completed | 34.59 s

Davison 2023 (1997 – 2021)

import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr

# shelf name with longitude and latitude
df = pd.read_excel("~/data/Davison_2023/adi0186_table_s2.xlsx",
                   sheet_name = 'Total mass changes',
                   usecols = (1,2,3), index_col = 0, skiprows = 4)
df = df.dropna()
shelf = gpd.GeoDataFrame(
    geometry=gpd.points_from_xy(df.longitude, df.latitude, crs="EPSG:4326"), data=df)
shelf = shelf.to_crs('EPSG:3031')

# region name
region = gpd.read_file("~/data//IMBIE/Rignot/ANT_Basins_IMBIE2_v1.6.shp")
region = region[region['Regions'] != 'Islands']

# find regions nearest each shelf
shelf_region = gpd.sjoin_nearest(shelf,region)
shelf_region = shelf_region.drop(columns=['index_right','latitude','longitude','Regions'])

# load melt time series per shelf
melt = pd.read_excel("~/data/Davison_2023/adi0186_table_s2.xlsx",
                     sheet_name = 'Melt', index_col = 1, skiprows = 3, header = (0,1))
melt = melt.T.dropna().drop(columns=['Antarctic Ice Shelves'])

obs = melt.xs('observed', level='Ice shelf')
obs.index.name = 'date'
obs.index = pd.to_datetime(obs.index.astype(int).astype(str)+'-07-01', format="%Y-%m-%d")

# unc = melt.drop('observed', level=1, axis=0).reset_index().set_index('level_0').drop(columns=['ice shelf'])
unc = melt.xs('uncertainty', level='Ice shelf')
unc.index.name = 'date'
unc.index = obs.index


# # add steady state to time series
# melt_dot = pd.read_excel("~/data/Davison_2023/adi0186_table_s2.xlsx", sheet_name = 'Steady-state', index_col = 0, skiprows = 5, usecols=(1,4,5))
# melt_dot.columns = [_.split('.')[0] for _ in melt_dot.columns]
# melt_dot = melt_dot.T
# obs_dot = melt_dot.loc['observed'].T
# unc_dot = melt_dot.loc['uncertainty'].T
# obs = obs + obs_dot
# unc = unc + unc_dot

da_obs = xr.DataArray(data = obs.values,
                      dims = ['date','shelf'],
                      coords = {'date':obs.index.values, 'shelf':obs.columns})

ds = xr.Dataset({'melt': da_obs})
ds['uncertainty'] = (('date','shelf'), unc)
ds = ds.where(ds['shelf'] != 'Antarctic Ice Shelves', drop=True)
ds['region'] = (('shelf'), shelf_region['Subregion'])

# ds = ds.groupby('region').sum() # Want to agg() with different functions per column...

# uncertainty is sqrt of sum of squares. Not sure how to do this in-place in Xarray.
ds['unc2'] = ds['uncertainty']**2
ds2 = xr.merge([
    ds[['melt','region']].groupby('region').sum(),
    ds[['unc2','region']].groupby('region').sum(),
])
ds2['uncertainty'] = ds2['unc2']**0.5
ds2 = ds2.drop_vars('unc2')
# uncertainty for all of AQ as (sum(u**2))**0.5 matches Davison 2023 sheet "Melt" row 168 "Antarctic Ice Shelves"

# need to calculate AQ-wide uncertainty at shelf resolution because step-aggregating is not commutative
ds2['uncertainty_AQ'] = np.sqrt(ds['unc2'].sum(dim='shelf'))

ds = ds2

!rm tmp/aq_davison_2023.nc
delayed_obj = ds.to_netcdf('tmp/aq_davison_2023.nc', compute=False)
from dask.diagnostics import ProgressBar
with ProgressBar():
    results = delayed_obj.compute()
[########################################] | 100% Completed | 102.91 ms

Uncertainty

Antarctic wide mean uncertainty from Davison (2023) is ~20 %

obs_aq = obs.sum(axis='columns')
unc_aq = (unc**2).sum(axis='columns')**0.5 # matches Davison 2023 sheet "Melt" row 168 "Antarctic Ice Shelves"

# unc_aq.T # matches 
err_pct = unc_aq / obs_aq * 100
err_pct.describe()
count    25.000000
mean     21.621548
std      10.183245
min      10.978453
25%      12.683884
50%      17.040673
75%      30.864864
max      37.599188
dtype: float64

NOTDONE Adusumilli 2020

  • citet:adusumilli_2020_data
import xarray as xr
import h5py

root = "/home/kdm/data/Adusumilli_2020"
fname = root + '/' + 'bb0448974g_3_1.h5'
hdf5file = root + '/' + fname

# import netCDF4
# ncf = netCDF4.Dataset(fname, diskless=True, persist=False)
# nch = ncf.groups.get('hdf5-name')
# xds = xr.open_dataset(xr.backends.NetCDF4DataStore(nch))

h5 = h5py.File(fname,'r')
w_b = h5['w_b'][:,:]
w_b_interp = h5['w_b_interp'][:,:]
w_b_uncert = h5['w_b_uncert'][:,:]
x = h5['x'][:,0]
y = h5['y'][:,0]
h5.close()

ds = xr.Dataset({
    'w_b': xr.DataArray(data=w_b, dims=['y','x'], coords={'x':x,'y':y},
                        attrs = {'_FillValue': -999.9, 'units':'m/yr'}),
    'w_b_inter': xr.DataArray(data=w_b_interp, dims=['y','x'], coords={'x':x,'y':y},
                        attrs = {'_FillValue': -999.9, 'units':'m/yr'}),
    'w_b_uncert': xr.DataArray(data=w_b_uncert, dims=['y','x'], coords={'x':x,'y':y},
                        attrs = {'_FillValue': -999.9, 'units':'m/yr'})})

ds = ds.where(ds < 3)

delayed_obj = ds.to_netcdf('./tmp/aq_adusumilli_2020.nc', compute=False)
from dask.diagnostics import ProgressBar
with ProgressBar():
    results = delayed_obj.compute()

print(ds)
[########################################] | 100% Completed | 7.44 s
<xarray.Dataset> Size: 3GB
Dimensions:     (y: 10229, x: 10941)
Coordinates:
  * x           (x) float64 88kB -2.736e+06 -2.736e+06 ... 2.734e+06 2.734e+06
  * y           (y) float64 82kB -2.374e+06 -2.374e+06 ... 2.74e+06 2.74e+06
Data variables:
    w_b         (y, x) float64 895MB nan nan nan nan nan ... nan nan nan nan nan
    w_b_inter   (y, x) float64 895MB nan nan nan nan nan ... nan nan nan nan nan
    w_b_uncert  (y, x) float64 895MB nan nan nan nan nan ... nan nan nan nan nan

Merge Paolo & Davison

import xarray as xr
import datetime
import numpy as np

p = xr.open_dataset('./tmp/aq_paolo_2024.nc')\
      .rename({'melt':'melt_paolo',
               'melt_err':'melt_err_paolo'})
d = xr.open_dataset('./tmp/aq_davison_2023.nc')\
      .rename({'date':'time',
               'melt':'melt_davison',
               'uncertainty':'melt_err_davison'})

m = xr.merge([p,d])

m['region_name'] = m['region']
m['region'] = np.arange(18).astype(np.int32) + 1
m['melt_mean'] = xr.concat([m['melt_paolo'],
                            m['melt_davison']],
                           dim='new_dim')\
                   .mean(dim='new_dim', skipna=True)

ds = xr.Dataset()
ds['time'] = m['time']
ds['region'] = m['region'].values

ds['melt'] = m['melt_mean'].T
ds['region_name'] = m['region_name']

ds.attrs['description'] = 'Antarctic region sub-shelf melt'
ds['melt'].attrs['units'] = 'Gt yr-1'
ds['melt'].attrs['long_name'] = 'Sub shelf melt'
ds['melt'].attrs['standard_name'] = 'water_flux_into_sea_water_from_land_ice'
ds['time'].attrs['standard_name'] = 'time'
ds['region'].attrs['long_name'] = 'IMBIE region'
ds.attrs['date_created'] = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%SZ")
ds.attrs['title'] = 'Subshelf melt per region'
ds.attrs['history'] = 'Processed for Schmidt (YYYY; in prep); by Ken Mankoff'
ds.attrs['source'] = 'doi:10.5067/SE3XH9RXQWAM and doi:10.5281/ZENODO.8052519'
ds.attrs['Conventions'] = 'CF-1.8'
ds.attrs['DOI'] = 'https://doi.org/10.5281/zenodo.14020895'

comp = dict(zlib=True, complevel=5)
encoding = {var: comp for var in ['melt']}
encoding['time'] = {'dtype': 'i4'}

!rm ./dat/AQ_subshelf_melt.nc
ds.to_netcdf('./dat/AQ_subshelf_melt.nc', encoding=encoding)
!ncdump -h ./dat/AQ_subshelf_melt.nc
netcdf AQ_subshelf_melt {
dimensions:
	time = 31 ;
	region = 18 ;
variables:
	int time(time) ;
		time:standard_name = "time" ;
		time:units = "days since 1991-07-01 00:00:00" ;
		time:calendar = "proleptic_gregorian" ;
	int region(region) ;
		region:long_name = "IMBIE region" ;
	double melt(region, time) ;
		melt:_FillValue = NaN ;
		melt:units = "Gt yr-1" ;
		melt:long_name = "Sub shelf melt" ;
		melt:standard_name = "water_flux_into_sea_water_from_land_ice" ;
	string region_name(region) ;

// global attributes:
		:description = "Antarctic region sub-shelf melt" ;
		:date_created = "20241101T050531Z" ;
		:title = "Subshelf melt per region" ;
		:history = "Processed for Schmidt (YYYY; in prep); by Ken Mankoff" ;
		:source = "doi:10.5067/SE3XH9RXQWAM and doi:10.5281/ZENODO.8052519" ;
		:Conventions = "CF-1.8" ;
		:DOI = "https://doi.org/10.5281/zenodo.14020895" ;
}