Skip to content

Commit

Permalink
new single profile object, updated index files
Browse files Browse the repository at this point in the history
  • Loading branch information
cgrdn committed Mar 12, 2024
1 parent b896bda commit 6dbec6a
Show file tree
Hide file tree
Showing 10 changed files with 95 additions and 41 deletions.
1 change: 1 addition & 0 deletions bgcArgoDMQC/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
from .core import *
from .oxygen import *
from .sprof import sprof
from .prof import prof
73 changes: 65 additions & 8 deletions bgcArgoDMQC/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ def load_argo(local_path, wmo, grid=False, verbose=True):
local_path = Path(local_path)
dac = io.get_dac(wmo)

if type(wmo) is not str:
wmo = str(wmo)
wmo = str(wmo) if type(wmo) is not str else wmo

# check that necessary files exist - can continue without BRtraj file but
# need Sprof and meta files
Expand Down Expand Up @@ -198,6 +197,62 @@ def load_argo(local_path, wmo, grid=False, verbose=True):

return floatData, Sprof, BRtraj, meta, fillvalue

def load_profile(local_path, wmo, cyc, kind='C'):
'''
Function to load in all data from a single profile file,
core or BGC.
Args:
local_path: local path of float data
wmo: float ID number
cyc: cycle number
kind: core ("C") or B ("B") file
Returns:
floatData: python dict() object with Argo variables
CYCLES, LATITUDE, LONGITUDE, and SDN all also have
analogous <VAR>_GRID fields that match the
dimension of PRES, TEMP, PSAL, DOXY, and O2SAT
Author:
Christopher Gordon
Fisheries and Oceans Canada
chris.gordon@dfo-mpo.gc.ca
'''

# make local_path a Path() object from a string, account for windows path
local_path = Path(local_path)
dac = io.get_dac(wmo)

wmo = str(wmo) if type(wmo) is not str else wmo
cyc = str(cyc) if type(wmo) is not str else cyc

kind = '' if kind == 'C' else kind

# check that the file exists - check for D-mode file first
profFile = local_path / dac / wmo / 'profiles' / f'{kind}D{wmo}_{cyc:03d}.nc'
profFile = profFile.parent / f'{kind}R{wmo}_{cyc:03d}.nc' if not profFile.exists() else profFile

if not profFile.exists():
raise FileNotFoundError(f'No R- or D-mode file: {profFile.absolute()}')

nc = Dataset(profFile, 'r')

# fillvalue dict
fillvalue = {k:nc[k]._FillValue for k in nc.variables.keys()}

floatData = read_flat_variables(nc)
floatData['SDN'] = floatData['JULD'] + mdates.datestr2num('1950-01-01')
floatData['CYCLES'] = floatData['CYCLE_NUMBER']
floatData['WMO'] = wmo

qc_keys = [s for s in floatData.keys() if '_QC' in s and ('PROFILE' not in s and 'HISTORY' not in s)]
for qc in qc_keys:
floatData[qc] = io.read_qc(floatData[qc])

return floatData, profFile, fillvalue

def read_flat_variables(nc):
'''
Read all variables and dimensions from an Argo netCDF file.
Expand Down Expand Up @@ -266,7 +321,7 @@ def read_history_qctest(nc):
def dict_clean(float_data, bad_flags=None):

clean_float_data = copy.deepcopy(float_data)
qc_flags = [k for k in clean_float_data.keys() if '_QC' in k and 'PROFILE' not in k]
qc_flags = [s for s in clean_float_data.keys() if '_QC' in s and ('PROFILE' not in s and 'HISTORY' not in s)]

if bad_flags is None:
for qc_key in qc_flags:
Expand Down Expand Up @@ -299,14 +354,15 @@ def dict_clean(float_data, bad_flags=None):
def dict_fillvalue_clean(float_data):

clean_float_data = copy.deepcopy(float_data)
qc_keys = [k for k in clean_float_data.keys() if '_QC' in k and 'SDN' not in k and 'PROFILE' not in k]
qc_keys = [s for s in clean_float_data.keys() if '_QC' in s and ('PROFILE' not in s and 'HISTORY' not in s)]

for k in qc_keys:
data_key = k.replace('_QC','')
if data_key == 'POSITION':
for dk in ['LATITUDE', 'LONGITUDE', 'LATITUDE_GRID', 'LONGITUDE_GRID']:
fillvalue_index = clean_float_data[dk] >= 99999. # use greater than because date fillval is 999999
clean_float_data[dk][fillvalue_index] = np.nan
if dk in clean_float_data.keys():
fillvalue_index = clean_float_data[dk] >= 99999. # use greater than because date fillval is 999999
clean_float_data[dk][fillvalue_index] = np.nan
else:
fillvalue_index = clean_float_data[data_key] >= 99999. # use greater than because date fillval is 999999
clean_float_data[data_key][fillvalue_index] = np.nan
Expand All @@ -319,8 +375,9 @@ def dict_fillvalue_clean(float_data):
fillvalue_index = clean_float_data['SDN'] >= 999999.
clean_float_data['SDN'][fillvalue_index] = np.nan

fillvalue_index = clean_float_data['SDN_GRID'] >= 999999.
clean_float_data['SDN_GRID'][fillvalue_index] = np.nan
if 'SDN_GRID' in float_data.keys():
fillvalue_index = clean_float_data['SDN_GRID'] >= 999999.
clean_float_data['SDN_GRID'][fillvalue_index] = np.nan

return clean_float_data

Expand Down
52 changes: 22 additions & 30 deletions bgcArgoDMQC/core/bio_prof.py → bgcArgoDMQC/core/prof.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,40 +8,37 @@
from .. import plot
from .. import io

class sprof:
class prof:
'''
Class that loads Argo synthetic profile data for a given float ID number
(wmo).
Then, load the individual variables into fields in the class, for
example::
syn = sprof(wmo)
print(syn.DOXY)
p = prof(wmo, cyc)
print(p.DOXY)
Or load it into a pandas dataframe::
df = syn.to_dataframe()
THe main function serves to minimize the onus on the user to organize
variables for quality control. Calculating an oxygen gain becomes simple::
gains = syn.calc_gains(ref='NCEP')
df = p.to_dataframe()
'''

set_dirs = set_dirs

def __init__(self, wmo, cycles=None, keep_fillvalue=False, rcheck=True, verbose=False):
def __init__(self, wmo, cycle, kind='C', keep_fillvalue=False, rcheck=True, verbose=False):

self.__floatdict__ = {}

# self.__floatdict__, self.__Sprof__, self.__BRtraj__, self.__meta__, self.__fillvalue__ = load_argo(ARGO_PATH, wmo, grid=True, verbose=verbose)
self.__floatdict__, self.__prof__, self.__fillvalue__ = load_profile(io.Path.ARGO_PATH, wmo, cycle, kind=kind)
self.__rawfloatdict__ = self.__floatdict__

# local path info
self.argo_path = ARGO_PATH
self.woa_path = WOA_PATH
self.ncep_path = NCEP_PATH
self.argo_path = io.Path.ARGO_PATH
self.woa_path = io.Path.WOA_PATH
self.ncep_path = io.Path.NCEP_PATH

self.WMO = wmo
self.cycle = cycle
self.kind = kind

self.to_dataframe()

Expand All @@ -52,13 +49,13 @@ def __init__(self, wmo, cycles=None, keep_fillvalue=False, rcheck=True, verbose=
self.check_range('DOXY')

def __getitem__(self, index):
return self.df[index]
return pd.Series(self.__floatdict__[index])

def __setitem__(self, index, value):
self.df[index] = value

def __getattr__(self, index):
return self.df[index]
return pd.Series(self.__floatdict__[index])

def rm_fillvalue(self):
'''
Expand Down Expand Up @@ -106,7 +103,7 @@ def check_range(self, key, verbose=False):
self.__floatdict__ = self.__rangecheckdict__

# recalculate O2sat if its DOXY
if k == 'DOXY':
if k == 'DOXY' and self.kind == 'S':
optode_flag = get_optode_type(int(self.__rangecheckdict__['WMO'])) == 'AANDERAA_OPTODE_4330'
self.__rangecheckdict__['O2Sat'] = 100*self.__rangecheckdict__['DOXY']/unit.oxy_sol(self.__rangecheckdict__['PSAL'], self.__rangecheckdict__['TEMP'], self.__rangecheckdict__['PDEN'], a4330=optode_flag)

Expand All @@ -126,8 +123,8 @@ def to_dataframe(self):
'''

df = pd.DataFrame()
n_level = self.__floatdict__['N_LEVELS']
priority_vars = ['PRES', 'PRES_QC', 'TEMP', 'TEMP_QC', 'PSAL', 'PSAL_QC']
n_level = self.__floatdict__['N_LEVELS']
priority_vars = ['PRES', 'PRES_QC', 'TEMP', 'TEMP_QC', 'PSAL', 'PSAL_QC'] if self.kind in ['C', 'S'] else ['PRES']
bgc_vars = list(set(self.__floatdict__.keys()) & set(['DOXY', 'DOXY_QC', 'DOXY_ADJUSTED', 'DOXY_ADJUSTED_QC', 'CHLA', 'CHLA_QC', 'CHLA_ADJUSTED', 'CHLA_ADJUSTED_QC', 'BBP700', 'BBP700_QC', 'BBP700_ADJUSTED', 'BBP_ADJUSTED_QC']))
priority_vars = priority_vars + bgc_vars

Expand All @@ -139,26 +136,21 @@ def to_dataframe(self):
if dim == n_level:
df[k] = self.__floatdict__[k]

self.df = df
return copy.deepcopy(self.df)

def update_field(self, field, value, where=None):

where = slice(None) if where is None else where
self.__floatdict__[field][where] = value

if field in ['DOXY', 'TEMP', 'PSAL']:
optode_flag = get_optode_type(int(self.__floatdict__['WMO'])) == 'AANDERAA_OPTODE_4330'
self.__floatdict__['O2Sat'] = unit.oxy_saturation(self.__floatdict__['DOXY'], self.__floatdict__['PSAL'], self.__floatdict__['TEMP'], self.__floatdict__['PDEN'], a4330=optode_flag)
elif field == 'DOXY_QC':
self.__floatdict__['O2Sat_QC'] = copy.deepcopy(self.__floatdict__['DOXY_QC'])

self.assign(self.__floatdict__)
self.to_dataframe()

def set_fillvalue(self, field, where=None):

self.update_field(field, self.__fillvalue__[field], where)

def export_files(self, data_mode=None, glob=None):
def export_files(self):

io.export_files(self.__floatdict__, self.__files__, data_mode=data_mode)
io.update_nc(self.__floatdict__, self.__prof__)
2 changes: 1 addition & 1 deletion bgcArgoDMQC/core/sprof.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def export_files(self, data_mode='D', glob=None, **kwargs):
glob = 'BR*.nc' if glob is None else glob
files = (self.__Sprof__.parent / 'profiles').glob(glob)

io.export_files(self.__floatdict__, files, self.gain, data_mode=data_mode, **kwargs)
io.export_delayed_files(self.__floatdict__, files, self.gain, data_mode=data_mode, **kwargs)
self.__floatdict__ = current_float_dict

def add_independent_data(self, date=None, lat=None, lon=None, data_dict=None, label=None, **kwargs):
Expand Down
8 changes: 6 additions & 2 deletions bgcArgoDMQC/io/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def profile_qc(flags):

return grade

def export_files(fdict, files, gain, data_mode='D', comment=None, equation=None, coeff=None):
def export_delayed_files(fdict, files, gain, data_mode='D', comment=None, equation=None, coeff=None):

config = read_config()
dmqc_date = pd.Timestamp.now(tz='utc').strftime('%Y%m%d%H%M%S')
Expand Down Expand Up @@ -353,4 +353,8 @@ def export_files(fdict, files, gain, data_mode='D', comment=None, equation=None,

update_history(D_nc, history_dict)
sys.stdout.write('done\n')
D_nc.close()
D_nc.close()

def update_nc(fdict, file, history_dict={}):

return
Binary file modified bgcArgoDMQC/resource/data/Index/ar_index_global_meta.txt.gz
Binary file not shown.
Binary file modified bgcArgoDMQC/resource/data/Index/ar_index_global_prof.txt.gz
Binary file not shown.
Binary file modified bgcArgoDMQC/resource/data/Index/ar_index_global_traj.txt.gz
Binary file not shown.
Binary file modified bgcArgoDMQC/resource/data/Index/argo_bio-profile_index.txt.gz
Binary file not shown.
Binary file not shown.

0 comments on commit 6dbec6a

Please sign in to comment.