diff --git a/docs/CTDFjorder/CTDFjorder.html b/docs/CTDFjorder/CTDFjorder.html index c214869..b661250 100644 --- a/docs/CTDFjorder/CTDFjorder.html +++ b/docs/CTDFjorder/CTDFjorder.html @@ -16,9 +16,16 @@ - + + + + + CTDFjorder + + API Documentation @@ -199,7 +206,7 @@ API Documentation -CTDFjorder.CTDFjorder +CTDFjorder.CTDFjorder @@ -267,1354 +274,1360 @@ 59 _filename = None 60 _calculator = None 61 _cwd = None - 62 master_sheet_path = "FjordPhyto MASTER SHEET.xlsx" - 63 _NO_SAMPLES_ERROR = "No samples in file." - 64 _NO_LOCATION_ERROR = "No location could be found." - 65 _DENSITY_CALCULATION_ERROR = "Could not calculate density on this dataset." - 66 _SALINITYABS_CALCULATION_ERROR = "Could not calculate density on this dataset." - 67 _DATA_CLEANING_ERROR = "No data remains after data cleaning, reverting to previous CTD" - 68 _REMOVE_NEGATIVES_ERROR = "No data remains after removing non-positive samples." - 69 _MLD_ERROR = "No data remains after calculating MLD." - 70 - 71 def __init__(self, rskfilepath): - 72 """ - 73 Initialize a new CTD object. - 74 - 75 Parameters - 76 ---------- - 77 rskfilepath : str - 78 The file path to the RSK file. - 79 """ - 80 self._rsk = RSK(rskfilepath) - 81 self._filename = ('_'.join(rskfilepath.split("/")[-1].split("_")[0:3]).split(".rsk")[0]) - 82 print("New CTDFjorder Object Created from : " + self._filename) - 83 self._ctd_array = np.array(self._rsk.npsamples()) - 84 self._ctd_array = pd.DataFrame(self._ctd_array) - 85 self.Utility = self.Utility(self._filename) - 86 self._cwd = _get_cwd() - 87 - 88 def view_table(self): - 89 """ - 90 Print the CTD data table. - 91 """ - 92 print(tabulate(self._ctd_array, headers='keys', tablefmt='psql')) - 93 - 94 def get_pandas_df(self , copy = True): - 95 """ - 96 Exposes the dataframe of the CTD object for custom processes. - 97 - 98 Parameters - 99 ---------- - 100 copy : bool, optional - 101 If True returns a copy, if False returns the actual DataFrame internal to the CTD object. Defaults to True. - 102 - 103 Returns - 104 ------- - 105 DataFrame - 106 The pandas df of the CTD object. - 107 """ - 108 return self._ctd_array.copy() if copy is True else self._ctd_array - 109 - 110 def add_filename_to_table(self): - 111 """ - 112 Add the filename to the CTD data table. - 113 """ - 114 self._ctd_array.assign(filename=self._filename) - 115 - 116 def remove_timezone_indicator(self): - 117 """ - 118 Removes the timezone indicator in the CTD data table 'timestamp' column. - 119 """ - 120 self._ctd_array = self.Utility.remove_sample_timezone_indicator(self._ctd_array) - 121 - 122 def add_location_to_table(self): - 123 """ - 124 Retrieves the sample location data from the RSK file and adds it to the CTD data table. - 125 If no location data is found, it attempts to estimate the location using the master sheet. - 126 """ - 127 location_data = self.Utility.get_sample_location(self._rsk, self._filename) - 128 if self.Utility.no_values_in_object(self._ctd_array): - 129 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) - 130 try: - 131 self._ctd_array = self._ctd_array.assign(latitude=location_data[0], - 132 longitude=location_data[1]) - 133 except Exception: - 134 self._ctd_array.loc['latitude'] = None - 135 self._ctd_array.loc['longitude'] = None - 136 self._ctd_array.loc['filename'] = None - 137 raise CTDError(self._filename, self._NO_LOCATION_ERROR) - 138 def remove_upcasts(self): - 139 """ - 140 Finds the global maximum depth of the sample, and filters out timestamps that occur before it. - 141 """ - 142 max_depth_index = self._ctd_array['depth_00'].idxmax() - 143 max_depth_timestamp = self._ctd_array.loc[max_depth_index, 'timestamp'] - 144 self._ctd_array = self._ctd_array[self._ctd_array['timestamp'] >=max_depth_timestamp] - 145 - 146 def remove_non_positive_samples(self): - 147 """ - 148 Iterates through the columns of the CTD data table and removes rows with non-positive values - 149 for depth, pressure, salinity, absolute salinity, or density. - 150 """ - 151 if self.Utility.no_values_in_object(self._ctd_array): - 152 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) - 153 for column in self._ctd_array.columns: - 154 match column: - 155 case 'depth_00': - 156 self._ctd_array = self.Utility.remove_rows_with_negative_depth(self._ctd_array) - 157 case 'pressure_00': - 158 self._ctd_array = self.Utility.remove_rows_with_negative_pressure(self._ctd_array) - 159 case 'salinity_00': - 160 self._ctd_array = self.Utility.remove_rows_with_negative_salinity(self._ctd_array) - 161 case 'salinityabs': - 162 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) - 163 case 'density': - 164 self._ctd_array = self.Utility.remove_rows_with_negative_density(self._ctd_array) - 165 if self.Utility.no_values_in_object(self._ctd_array): - 166 raise CTDError(self._filename, self._REMOVE_NEGATIVES_ERROR) - 167 - 168 def clean(self, feature, method='salinitydiff'): - 169 """ - 170 Applies complex data cleaning methods to the specified feature based on the selected method. - 171 Currently supports cleaning practical salinity using the 'salinitydiff' method. - 172 - 173 Parameters - 174 ---------- - 175 feature : str - 176 The feature to clean (e.g., 'practicalsalinity'). - 177 method : str, optional - 178 The cleaning method to apply, defaults to 'salinitydiff'. - 179 Options are 'salinitydiff'. - 180 """ - 181 if self.Utility.no_values_in_object(self._ctd_array): - 182 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) - 183 supported_features = { - 184 "practicalsalinity": "salinity_00" - 185 } - 186 supported_methods = { - 187 "salinitydiff": Calculate.calculate_and_drop_salinity_spikes(self._ctd_array), - 188 } - 189 if feature in supported_features.keys(): - 190 if method in supported_methods.keys(): - 191 self._ctd_array.loc[self._ctd_array.index, 'salinity_00'] = supported_methods[method] - 192 else: - 193 print(f"clean: Invalid method \"{method}\" not in {supported_methods.keys()}") - 194 else: - 195 print(f"clean: Invalid feature \"{feature}\" not in {supported_features.keys()}.") - 196 if self.Utility.no_values_in_object(self._ctd_array): - 197 raise CTDError(self._filename, self._DATA_CLEANING_ERROR) - 198 - 199 def add_absolute_salinity(self): - 200 """ - 201 Calculates the absolute salinity using the TEOS-10 equations and adds it as a new column - 202 to the CTD data table. Removes rows with negative absolute salinity values. - 203 """ - 204 if self.Utility.no_values_in_object(self._ctd_array): - 205 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) - 206 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = Calculate.calculate_absolute_salinity( - 207 self._ctd_array) - 208 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) - 209 if self.Utility.no_values_in_object(self._ctd_array): - 210 raise CTDError(self._filename, self._SALINITYABS_CALCULATION_ERROR) - 211 - 212 def add_density(self): - 213 """ - 214 Calculates the density using the TEOS-10 equations and adds it as a new column to the CTD - 215 data table. If absolute salinity is not present, it is calculated first. - 216 """ - 217 if self.Utility.no_values_in_object(self._ctd_array): - 218 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) - 219 if 'salinityabs' in self._ctd_array.columns: - 220 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( - 221 self._ctd_array) - 222 else: - 223 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = self.add_absolute_salinity() - 224 self._ctd_array = Calculate.calculate_absolute_density(self._ctd_array) - 225 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( - 226 self._ctd_array) - 227 self._ctd_array.drop('salinityabs') - 228 if self.Utility.no_values_in_object(self._ctd_array): - 229 raise CTDError(self._filename, self._DENSITY_CALCULATION_ERROR) - 230 - 231 def add_overturns(self): - 232 """ - 233 Calculates density changes between consecutive measurements and identifies overturns where - 234 denser water lies above less dense water. Adds an 'overturn' column to the CTD data table. - 235 """ - 236 if self.Utility.no_values_in_object(self._ctd_array): - 237 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) - 238 self._ctd_array = Calculate.calculate_overturns(self._ctd_array.copy()) - 239 - 240 def add_mean_surface_density(self, start = 0.0, end = 100.0): - 241 """ - 242 Calculates the mean surface density from the density values and adds it as a new column - 243 to the CTD data table. - 244 - 245 Parameters - 246 ---------- - 247 start : float, optional - 248 Depth bound, defaults to 0. - 249 end : float, optional - 250 Depth bound, default to 1. - 251 """ - 252 if self.Utility.no_values_in_object(self._ctd_array): - 253 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) - 254 mean_surface_density = Calculate.calculate_mean_surface_density(self._ctd_array.copy(), (start, end)) - 255 self._ctd_array = self._ctd_array.assign(mean_surface_density=mean_surface_density) - 256 - 257 def add_mld(self, reference, method="default"): - 258 """ - 259 Calculates the mixed layer depth using the specified method and reference depth. - 260 Adds the MLD and the actual reference depth used as new columns to the CTD data table. - 261 - 262 Parameters - 263 ---------- - 264 reference : int - 265 The reference depth for MLD calculation. - 266 method : int - 267 The MLD calculation method (default: "default"). - 268 """ - 269 if self.Utility.no_values_in_object(self._ctd_array): - 270 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) - 271 copy_ctd_array = self._ctd_array.copy() - 272 supported_methods = [ - 273 "default" - 274 ] - 275 unpack = None - 276 - 277 if method == "default": - 278 unpack = Calculate.calculate_mld(copy_ctd_array['density'], copy_ctd_array['depth_00'], - 279 reference) - 280 else: - 281 print(f"add_mld: Invalid method \"{method}\" not in {supported_methods}") - 282 unpack = [None, None] - 283 if unpack is None: + 62 _cached_master_sheet = None + 63 master_sheet_path = "FjordPhyto MASTER SHEET.xlsx" + 64 _NO_SAMPLES_ERROR = "No samples in file." + 65 _NO_LOCATION_ERROR = "No location could be found." + 66 _DENSITY_CALCULATION_ERROR = "Could not calculate density on this dataset." + 67 _SALINITYABS_CALCULATION_ERROR = "Could not calculate density on this dataset." + 68 _DATA_CLEANING_ERROR = "No data remains after data cleaning, reverting to previous CTD" + 69 _REMOVE_NEGATIVES_ERROR = "No data remains after removing non-positive samples." + 70 _MLD_ERROR = "No data remains after calculating MLD." + 71 + 72 def __init__(self, rskfilepath): + 73 """ + 74 Initialize a new CTD object. + 75 + 76 Parameters + 77 ---------- + 78 rskfilepath : str + 79 The file path to the RSK file. + 80 """ + 81 self._rsk = RSK(rskfilepath) + 82 self._filename = ('_'.join(rskfilepath.split("/")[-1].split("_")[0:3]).split(".rsk")[0]) + 83 print("New CTDFjorder Object Created from : " + self._filename) + 84 self._ctd_array = np.array(self._rsk.npsamples()) + 85 self._ctd_array = pd.DataFrame(self._ctd_array) + 86 self.Utility = self.Utility(self._filename) + 87 self._cwd = _get_cwd() + 88 + 89 def view_table(self): + 90 """ + 91 Print the CTD data table. + 92 """ + 93 print(tabulate(self._ctd_array, headers='keys', tablefmt='psql')) + 94 + 95 def get_pandas_df(self , copy = True): + 96 """ + 97 Exposes the dataframe of the CTD object for custom processes. + 98 + 99 Parameters + 100 ---------- + 101 copy : bool, optional + 102 If True returns a copy, if False returns the actual DataFrame internal to the CTD object. Defaults to True. + 103 + 104 Returns + 105 ------- + 106 DataFrame + 107 The pandas df of the CTD object. + 108 """ + 109 return self._ctd_array.copy() if copy is True else self._ctd_array + 110 + 111 def add_filename_to_table(self): + 112 """ + 113 Add the filename to the CTD data table. + 114 """ + 115 self._ctd_array.assign(filename=self._filename) + 116 + 117 def remove_timezone_indicator(self): + 118 """ + 119 Removes the timezone indicator in the CTD data table 'timestamp' column. + 120 """ + 121 self._ctd_array = self.Utility.remove_sample_timezone_indicator(self._ctd_array) + 122 + 123 def add_location_to_table(self): + 124 """ + 125 Retrieves the sample location data from the RSK file and adds it to the CTD data table. + 126 If no location data is found, it attempts to estimate the location using the master sheet. + 127 """ + 128 location_data = self.Utility.get_sample_location(self._rsk, self._filename) + 129 if self.Utility.no_values_in_object(self._ctd_array): + 130 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) + 131 try: + 132 self._ctd_array = self._ctd_array.assign(latitude=location_data[0], + 133 longitude=location_data[1], + 134 filename=location_data[2]) + 135 except Exception: + 136 self._ctd_array.loc['latitude'] = None + 137 self._ctd_array.loc['longitude'] = None + 138 self._ctd_array.loc['filename'] = None + 139 raise CTDError(self._filename, self._NO_LOCATION_ERROR) + 140 def remove_upcasts(self): + 141 """ + 142 Finds the global maximum depth of the sample, and filters out timestamps that occur before it. + 143 """ + 144 max_depth_index = self._ctd_array['depth_00'].idxmax() + 145 max_depth_timestamp = self._ctd_array.loc[max_depth_index, 'timestamp'] + 146 self._ctd_array = self._ctd_array[self._ctd_array['timestamp'] >=max_depth_timestamp] + 147 + 148 def remove_non_positive_samples(self): + 149 """ + 150 Iterates through the columns of the CTD data table and removes rows with non-positive values + 151 for depth, pressure, salinity, absolute salinity, or density. + 152 """ + 153 if self.Utility.no_values_in_object(self._ctd_array): + 154 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) + 155 for column in self._ctd_array.columns: + 156 match column: + 157 case 'depth_00': + 158 self._ctd_array = self.Utility.remove_rows_with_negative_depth(self._ctd_array) + 159 case 'pressure_00': + 160 self._ctd_array = self.Utility.remove_rows_with_negative_pressure(self._ctd_array) + 161 case 'salinity_00': + 162 self._ctd_array = self.Utility.remove_rows_with_negative_salinity(self._ctd_array) + 163 case 'salinityabs': + 164 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) + 165 case 'density': + 166 self._ctd_array = self.Utility.remove_rows_with_negative_density(self._ctd_array) + 167 if self.Utility.no_values_in_object(self._ctd_array): + 168 raise CTDError(self._filename, self._REMOVE_NEGATIVES_ERROR) + 169 + 170 def clean(self, feature, method='salinitydiff'): + 171 """ + 172 Applies complex data cleaning methods to the specified feature based on the selected method. + 173 Currently supports cleaning practical salinity using the 'salinitydiff' method. + 174 + 175 Parameters + 176 ---------- + 177 feature : str + 178 The feature to clean (e.g., 'practicalsalinity'). + 179 method : str, optional + 180 The cleaning method to apply, defaults to 'salinitydiff'. + 181 Options are 'salinitydiff'. + 182 """ + 183 if self.Utility.no_values_in_object(self._ctd_array): + 184 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) + 185 supported_features = { + 186 "practicalsalinity": "salinity_00" + 187 } + 188 supported_methods = { + 189 "salinitydiff": Calculate.calculate_and_drop_salinity_spikes(self._ctd_array), + 190 } + 191 if feature in supported_features.keys(): + 192 if method in supported_methods.keys(): + 193 self._ctd_array.loc[self._ctd_array.index, 'salinity_00'] = supported_methods[method] + 194 else: + 195 print(f"clean: Invalid method \"{method}\" not in {supported_methods.keys()}") + 196 else: + 197 print(f"clean: Invalid feature \"{feature}\" not in {supported_features.keys()}.") + 198 if self.Utility.no_values_in_object(self._ctd_array): + 199 raise CTDError(self._filename, self._DATA_CLEANING_ERROR) + 200 + 201 def add_absolute_salinity(self): + 202 """ + 203 Calculates the absolute salinity using the TEOS-10 equations and adds it as a new column + 204 to the CTD data table. Removes rows with negative absolute salinity values. + 205 """ + 206 if self.Utility.no_values_in_object(self._ctd_array): + 207 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) + 208 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = Calculate.calculate_absolute_salinity( + 209 self._ctd_array) + 210 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) + 211 if self.Utility.no_values_in_object(self._ctd_array): + 212 raise CTDError(self._filename, self._SALINITYABS_CALCULATION_ERROR) + 213 + 214 def add_density(self): + 215 """ + 216 Calculates the density using the TEOS-10 equations and adds it as a new column to the CTD + 217 data table. If absolute salinity is not present, it is calculated first. + 218 """ + 219 if self.Utility.no_values_in_object(self._ctd_array): + 220 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) + 221 if 'salinityabs' in self._ctd_array.columns: + 222 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( + 223 self._ctd_array) + 224 else: + 225 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = self.add_absolute_salinity() + 226 self._ctd_array = Calculate.calculate_absolute_density(self._ctd_array) + 227 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( + 228 self._ctd_array) + 229 self._ctd_array.drop('salinityabs') + 230 if self.Utility.no_values_in_object(self._ctd_array): + 231 raise CTDError(self._filename, self._DENSITY_CALCULATION_ERROR) + 232 + 233 def add_overturns(self): + 234 """ + 235 Calculates density changes between consecutive measurements and identifies overturns where + 236 denser water lies above less dense water. Adds an 'overturn' column to the CTD data table. + 237 """ + 238 if self.Utility.no_values_in_object(self._ctd_array): + 239 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) + 240 self._ctd_array = Calculate.calculate_overturns(self._ctd_array.copy()) + 241 + 242 def add_mean_surface_density(self, start = 0.0, end = 100.0): + 243 """ + 244 Calculates the mean surface density from the density values and adds it as a new column + 245 to the CTD data table. + 246 + 247 Parameters + 248 ---------- + 249 start : float, optional + 250 Depth bound, defaults to 0. + 251 end : float, optional + 252 Depth bound, default to 1. + 253 """ + 254 if self.Utility.no_values_in_object(self._ctd_array): + 255 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) + 256 mean_surface_density = Calculate.calculate_mean_surface_density(self._ctd_array.copy(), (start, end)) + 257 self._ctd_array = self._ctd_array.assign(mean_surface_density=mean_surface_density) + 258 + 259 def add_mld(self, reference, method="default"): + 260 """ + 261 Calculates the mixed layer depth using the specified method and reference depth. + 262 Adds the MLD and the actual reference depth used as new columns to the CTD data table. + 263 + 264 Parameters + 265 ---------- + 266 reference : int + 267 The reference depth for MLD calculation. + 268 method : int + 269 The MLD calculation method (default: "default"). + 270 """ + 271 if self.Utility.no_values_in_object(self._ctd_array): + 272 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) + 273 copy_ctd_array = self._ctd_array.copy() + 274 supported_methods = [ + 275 "default" + 276 ] + 277 unpack = None + 278 + 279 if method == "default": + 280 unpack = Calculate.calculate_mld(copy_ctd_array['density'], copy_ctd_array['depth_00'], + 281 reference) + 282 else: + 283 print(f"add_mld: Invalid method \"{method}\" not in {supported_methods}") 284 unpack = [None, None] - 285 raise CTDError("MLD could not be calculated.") - 286 MLD = unpack[0] - 287 depth_used_as_reference = unpack[1] - 288 self._ctd_array.loc[self._ctd_array.index, f'MLD {reference}'] = MLD - 289 self._ctd_array.loc[ - 290 self._ctd_array.index, f'MLD {reference} Actual Reference Depth'] = depth_used_as_reference - 291 self._ctd_array = copy_ctd_array.merge(self._ctd_array) - 292 if self.Utility.no_values_in_object(self._ctd_array): - 293 raise CTDError(self._filename, self._MLD_ERROR) - 294 - 295 def save_to_csv(self, output_file): - 296 """ - 297 Renames the columns of the CTD data table based on a predefined mapping and saves the - 298 data to the specified CSV file. If the file already exists, the data is appended to it. - 299 - 300 Parameters - 301 ---------- - 302 output_file : str - 303 The output CSV file path. - 304 """ - 305 rsk_labels = { - 306 "temperature_00": "Temperature (°C)", - 307 "pressure_00": "Pressure (dbar)", - 308 "chlorophyll_00": "Chlorophyll a (µg/l)", - 309 "seapressure_00": "Sea Pressure (dbar)", - 310 "depth_00": "Depth (m)", - 311 "salinity_00": "Salinity (PSU)", - 312 "speedofsound_00": "Speed of Sound (m/s)", - 313 "specificconductivity_00": "Specific Conductivity (µS/cm)", - 314 "conductivity_00": "Conductivity (mS/cm)", - 315 "density": "Density (kg/m^3) Derived", - 316 "salinityabs": "Absolute Salinity (g/kg) Derived", - 317 "MLD_Zero": "MLD Zero (m) Derived", - 318 "MLD_Ten": "MLD Ten (m) Derived", - 319 "stratification": "Stratification (J/m^2) Derived", - 320 "mean_surface_density": "Mean Surface Density (kg/m^3) Derived", - 321 "overturn": "Overturn (Δρ < -0.05)" - 322 } - 323 # Renaming columns - 324 data = self._ctd_array.copy() - 325 if 'filename' in data.columns: - 326 data = data[[col for col in data.columns if col != 'filename'] + ['filename']] - 327 for key, new_column_name in rsk_labels.items(): - 328 if key in data.columns: - 329 data = data.rename(columns={key: new_column_name}) - 330 data.reset_index(inplace=True, drop=True) - 331 try: - 332 csv_df = pd.read_csv(str(output_file)) - 333 except FileNotFoundError: - 334 print(f"Error: The file {output_file} does not exist. A new file will be created.") - 335 csv_df = pd.DataFrame() # If file does not exist, create an empty DataFrame - 336 - 337 # Merge the existing DataFrame with the new DataFrame - 338 merged_df = pd.concat([csv_df, data], ignore_index=True) - 339 - 340 # Overwrite the original CSV file with the merged DataFrame - 341 merged_df.to_csv(output_file, index=False) - 342 - 343 return merged_df + 285 if unpack is None: + 286 unpack = [None, None] + 287 raise CTDError("MLD could not be calculated.") + 288 MLD = unpack[0] + 289 depth_used_as_reference = unpack[1] + 290 self._ctd_array.loc[self._ctd_array.index, f'MLD {reference}'] = MLD + 291 self._ctd_array.loc[ + 292 self._ctd_array.index, f'MLD {reference} Actual Reference Depth'] = depth_used_as_reference + 293 self._ctd_array = copy_ctd_array.merge(self._ctd_array) + 294 if self.Utility.no_values_in_object(self._ctd_array): + 295 raise CTDError(self._filename, self._MLD_ERROR) + 296 + 297 def save_to_csv(self, output_file): + 298 """ + 299 Renames the columns of the CTD data table based on a predefined mapping and saves the + 300 data to the specified CSV file. If the file already exists, the data is appended to it. + 301 + 302 Parameters + 303 ---------- + 304 output_file : str + 305 The output CSV file path. + 306 """ + 307 rsk_labels = { + 308 "temperature_00": "Temperature (°C)", + 309 "pressure_00": "Pressure (dbar)", + 310 "chlorophyll_00": "Chlorophyll a (µg/l)", + 311 "seapressure_00": "Sea Pressure (dbar)", + 312 "depth_00": "Depth (m)", + 313 "salinity_00": "Salinity (PSU)", + 314 "speedofsound_00": "Speed of Sound (m/s)", + 315 "specificconductivity_00": "Specific Conductivity (µS/cm)", + 316 "conductivity_00": "Conductivity (mS/cm)", + 317 "density": "Density (kg/m^3) Derived", + 318 "salinityabs": "Absolute Salinity (g/kg) Derived", + 319 "MLD_Zero": "MLD Zero (m) Derived", + 320 "MLD_Ten": "MLD Ten (m) Derived", + 321 "stratification": "Stratification (J/m^2) Derived", + 322 "mean_surface_density": "Mean Surface Density (kg/m^3) Derived", + 323 "overturn": "Overturn (Δρ < -0.05)" + 324 } + 325 # Renaming columns + 326 data = self._ctd_array.copy() + 327 if 'filename' in data.columns: + 328 data = data[[col for col in data.columns if col != 'filename'] + ['filename']] + 329 for key, new_column_name in rsk_labels.items(): + 330 if key in data.columns: + 331 data = data.rename(columns={key: new_column_name}) + 332 data.reset_index(inplace=True, drop=True) + 333 try: + 334 csv_df = pd.read_csv(str(output_file)) + 335 except FileNotFoundError: + 336 print(f"Error: The file {output_file} does not exist. A new file will be created.") + 337 csv_df = pd.DataFrame() # If file does not exist, create an empty DataFrame + 338 + 339 # Merge the existing DataFrame with the new DataFrame + 340 merged_df = pd.concat([csv_df, data], ignore_index=True) + 341 + 342 # Overwrite the original CSV file with the merged DataFrame + 343 merged_df.to_csv(output_file, index=False) 344 - 345 def plot_depth_salinity_density_mld_line(self): - 346 """ - 347 Generates a plot of depth vs. salinity and density, applying LOESS smoothing to the data. - 348 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. - 349 Saves the plot as an image file. - 350 """ - 351 df = self._ctd_array.copy() - 352 filename = self._filename - 353 plt.rcParams.update({'font.size': 16}) - 354 df_filtered = df - 355 if df_filtered.isnull().values.any(): - 356 df_filtered.dropna(inplace=True) # Drop rows with NaNs - 357 df_filtered = df_filtered.reset_index(drop=True) - 358 if len(df_filtered) < 1: - 359 return - 360 fig, ax1 = plt.subplots(figsize=(18, 18)) - 361 ax1.invert_yaxis() - 362 # Dynamically set y-axis limits based on depth data - 363 max_depth = df_filtered['depth_00'].max() - 364 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards - 365 lowess = statsmodels.api.nonparametric.lowess - 366 salinity_lowess = lowess(df_filtered['salinity_00'], df_filtered['depth_00'], frac=0.1) - 367 salinity_depths, salinity_smooth = zip(*salinity_lowess) - 368 color_salinity = 'tab:blue' - 369 ax1.plot(salinity_smooth, salinity_depths, color=color_salinity, label='Practical Salinity') - 370 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) - 371 ax1.set_ylabel('Depth (m)') - 372 ax1.tick_params(axis='x', labelcolor=color_salinity) - 373 density_lowess = lowess(df_filtered['density'], df_filtered['depth_00'], frac=0.1) - 374 density_depths, density_smooth = zip(*density_lowess) - 375 ax2 = ax1.twiny() - 376 color_density = 'tab:red' - 377 ax2.plot(density_smooth, density_depths, color=color_density, label='Density (kg/m^3)') - 378 ax2.set_xlabel('Density (kg/m^3)', color=color_density) - 379 ax2.tick_params(axis='x', labelcolor=color_density) - 380 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) - 381 mld_cols = [] - 382 for col in df.columns: - 383 if 'MLD' in col and 'Actual' not in col: - 384 mld_cols.append(df[col]) - 385 refdepth_cols = [] - 386 for col in df.columns: - 387 if 'Actual' in col: - 388 refdepth_cols.append(df[col]) - 389 for idx, mld_col in enumerate(mld_cols): - 390 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', - 391 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') - 392 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', - 393 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) - 394 if df_filtered['overturn'].any(): - 395 plt.title( - 396 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " - 397 f"\n THIS IS AN UNSTABLE WATER COLUMN " - 398 f"\n(Higher density fluid lies above lower density fluid)") - 399 else: - 400 plt.title( - 401 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " - 402 f"\n THIS IS AN UNSTABLE WATER COLUMN " - 403 f"\n(Higher density fluid lies above lower density fluid)") - 404 ax1.grid(True) - 405 lines, labels = ax1.get_legend_handles_labels() - 406 ax2_legend = ax2.get_legend_handles_labels() - 407 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='lower center', bbox_to_anchor=(0.5, -0.15), - 408 ncol=3) - 409 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes_line.png") - 410 plot_folder = os.path.join(self._cwd, "plots") - 411 if not (os.path.isdir(plot_folder)): - 412 os.mkdir(plot_folder) - 413 plt.savefig(plot_path) - 414 plt.close(fig) - 415 - 416 def plot_depth_density_salinity_mld_scatter(self): - 417 """ - 418 Generates a scatter plot of depth vs. salinity and density. - 419 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. - 420 Saves the plot as an image file. - 421 """ - 422 df = self._ctd_array.copy() - 423 filename = self._filename - 424 plt.rcParams.update({'font.size': 16}) - 425 df_filtered = df - 426 if df_filtered.empty: - 427 plt.close() - 428 return - 429 df_filtered = df_filtered.reset_index(drop=True) - 430 fig, ax1 = plt.subplots(figsize=(18, 18)) - 431 ax1.invert_yaxis() - 432 # Dynamically set y-axis limits based on depth data - 433 max_depth = df_filtered['depth_00'].max() - 434 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards - 435 color_salinity = 'tab:blue' - 436 ax1.scatter(df_filtered['salinity_00'], df_filtered['depth_00'], color=color_salinity, - 437 label='Practical Salinity') - 438 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) - 439 ax1.set_ylabel('Depth (m)') - 440 ax1.tick_params(axis='x', labelcolor=color_salinity) - 441 ax2 = ax1.twiny() - 442 color_density = 'tab:red' - 443 ax2.scatter(df_filtered['density'], df_filtered['depth_00'], color=color_density, label='Density (kg/m^3)') - 444 ax2.set_xlabel('Density (kg/m^3)', color=color_density) - 445 ax2.tick_params(axis='x', labelcolor=color_density) - 446 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) - 447 mld_cols = [] - 448 for col in df.columns: - 449 if 'MLD' in col and 'Actual' not in col: - 450 mld_cols.append(df[col]) - 451 refdepth_cols = [] - 452 for col in df.columns: - 453 if 'Actual' in col: - 454 refdepth_cols.append(df[col]) - 455 for idx, mld_col in enumerate(mld_cols): - 456 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', - 457 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') - 458 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', - 459 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) - 460 if df_filtered['overturn'].any(): - 461 plt.title( - 462 f"{filename}\n Depth vs. Salinity and Density " - 463 f"\n THIS IS AN UNSTABLE WATER COLUMN " - 464 f"\n(Higher density fluid lies above lower density fluid)") - 465 else: - 466 plt.title( - 467 f"{filename}\n Depth vs. Salinity and Density " - 468 f"\n THIS IS AN UNSTABLE WATER COLUMN " - 469 f"\n(Higher density fluid lies above lower density fluid)") - 470 ax1.grid(True) - 471 lines, labels = ax1.get_legend_handles_labels() - 472 ax2_legend = ax2.get_legend_handles_labels() - 473 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='upper center', bbox_to_anchor=(0.5, -0.15), - 474 ncol=3) - 475 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes.png") - 476 plot_folder = os.path.join(self._cwd, "plots") - 477 if not (os.path.isdir(plot_folder)): - 478 os.mkdir(plot_folder) - 479 plt.savefig(plot_path) - 480 plt.close(fig) - 481 - 482 def plot_depth_temperature_scatter(self): - 483 """ - 484 Generates a scatter plot of depth vs. temperature. - 485 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. - 486 Saves the plot as an image file. - 487 """ - 488 df = self._ctd_array.copy() - 489 filename = self._filename - 490 plt.rcParams.update({'font.size': 16}) - 491 df_filtered = df - 492 if df_filtered.empty: - 493 plt.close() - 494 return - 495 df_filtered = df_filtered.reset_index(drop=True) - 496 fig, ax1 = plt.subplots(figsize=(18, 18)) - 497 ax1.invert_yaxis() - 498 # Dynamically set y-axis limits based on depth data - 499 max_depth = df_filtered['depth_00'].max() - 500 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards - 501 - 502 color_temp = 'tab:blue' - 503 ax1.scatter(df_filtered['temperature_00'], df_filtered['depth_00'], color=color_temp, - 504 label="Temperature (°C)") - 505 ax1.set_xlabel("Temperature (°C)", color=color_temp) - 506 ax1.set_ylabel('Depth (m)') - 507 ax1.tick_params(axis='x', labelcolor=color_temp) - 508 mld_cols = [] - 509 for col in df.columns: - 510 if "MLD" in col and "Actual" not in col: - 511 mld_cols.append(df[col]) - 512 refdepth_cols = [] - 513 for col in df.columns: - 514 if "Reference Depth" in col: - 515 refdepth_cols.append(df[col]) - 516 for idx, mld_col in enumerate(mld_cols): - 517 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', - 518 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') - 519 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', - 520 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) - 521 if df_filtered['overturn'].any(): - 522 plt.title( - 523 f"{filename}\n Depth vs. Temperature \n " - 524 f"THIS IS AN UNSTABLE WATER COLUMN \n" - 525 f"(Higher density fluid lies above lower density fluid)") - 526 else: - 527 plt.title( - 528 f"{filename}\n Depth vs. Temperature \n " - 529 f"THIS IS AN UNSTABLE WATER COLUMN \n" - 530 f"(Higher density fluid lies above lower density fluid)") - 531 ax1.grid(True) - 532 lines, labels = ax1.get_legend_handles_labels() - 533 ax1.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3) - 534 plot_path = os.path.join(self._cwd, f"plots/{filename}_temperature_depth_plot.png") - 535 plot_folder = os.path.join(self._cwd, "plots") - 536 if not (os.path.isdir(plot_folder)): - 537 os.mkdir(plot_folder) - 538 plt.savefig(plot_path) - 539 plt.close(fig) - 540 - 541 class Utility: - 542 """ - 543 Utility - 544 -------- - 545 Utility class for CTD data processing. - 546 - 547 Attributes - 548 ---------- - 549 filename : str - 550 Filename of the RSK file. - 551 mastersheet : str - 552 Path to the master sheet Excel file. - 553 """ - 554 - 555 def __init__(self, filename): - 556 """ - 557 Initialize a new Utility object. - 558 Parameters - 559 ---------- - 560 filename : str - 561 The filename of the RSK file. - 562 """ - 563 self.filename = filename - 564 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) - 565 - 566 def no_values_in_object(self, object_to_check): - 567 """ - 568 Checks if the given object is None, empty, or has a length greater than 0. - 569 Returns True if the object has no values, False otherwise. - 570 - 571 Parameters - 572 ---------- - 573 object_to_check : object - 574 The object to check for values. - 575 Returns - 576 -------- - 577 bool - 578 True if the object has no values, False otherwise. - 579 """ - 580 if isinstance(object_to_check, type(None)): - 581 return True - 582 if object_to_check.empty: + 345 return merged_df + 346 + 347 def plot_depth_salinity_density_mld_line(self): + 348 """ + 349 Generates a plot of depth vs. salinity and density, applying LOESS smoothing to the data. + 350 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. + 351 Saves the plot as an image file. + 352 """ + 353 df = self._ctd_array.copy() + 354 filename = self._filename + 355 plt.rcParams.update({'font.size': 16}) + 356 df_filtered = df + 357 if df_filtered.isnull().values.any(): + 358 df_filtered.dropna(inplace=True) # Drop rows with NaNs + 359 df_filtered = df_filtered.reset_index(drop=True) + 360 if len(df_filtered) < 1: + 361 return + 362 fig, ax1 = plt.subplots(figsize=(18, 18)) + 363 ax1.invert_yaxis() + 364 # Dynamically set y-axis limits based on depth data + 365 max_depth = df_filtered['depth_00'].max() + 366 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards + 367 lowess = statsmodels.api.nonparametric.lowess + 368 salinity_lowess = lowess(df_filtered['salinity_00'], df_filtered['depth_00'], frac=0.1) + 369 salinity_depths, salinity_smooth = zip(*salinity_lowess) + 370 color_salinity = 'tab:blue' + 371 ax1.plot(salinity_smooth, salinity_depths, color=color_salinity, label='Practical Salinity') + 372 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) + 373 ax1.set_ylabel('Depth (m)') + 374 ax1.tick_params(axis='x', labelcolor=color_salinity) + 375 density_lowess = lowess(df_filtered['density'], df_filtered['depth_00'], frac=0.1) + 376 density_depths, density_smooth = zip(*density_lowess) + 377 ax2 = ax1.twiny() + 378 color_density = 'tab:red' + 379 ax2.plot(density_smooth, density_depths, color=color_density, label='Density (kg/m^3)') + 380 ax2.set_xlabel('Density (kg/m^3)', color=color_density) + 381 ax2.tick_params(axis='x', labelcolor=color_density) + 382 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) + 383 mld_cols = [] + 384 for col in df.columns: + 385 if 'MLD' in col and 'Actual' not in col: + 386 mld_cols.append(df[col]) + 387 refdepth_cols = [] + 388 for col in df.columns: + 389 if 'Actual' in col: + 390 refdepth_cols.append(df[col]) + 391 for idx, mld_col in enumerate(mld_cols): + 392 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', + 393 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') + 394 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', + 395 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) + 396 if df_filtered['overturn'].any(): + 397 plt.title( + 398 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " + 399 f"\n THIS IS AN UNSTABLE WATER COLUMN " + 400 f"\n(Higher density fluid lies above lower density fluid)") + 401 else: + 402 plt.title( + 403 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " + 404 f"\n THIS IS AN UNSTABLE WATER COLUMN " + 405 f"\n(Higher density fluid lies above lower density fluid)") + 406 ax1.grid(True) + 407 lines, labels = ax1.get_legend_handles_labels() + 408 ax2_legend = ax2.get_legend_handles_labels() + 409 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='lower center', bbox_to_anchor=(0.5, -0.15), + 410 ncol=3) + 411 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes_line.png") + 412 plot_folder = os.path.join(self._cwd, "plots") + 413 if not (os.path.isdir(plot_folder)): + 414 os.mkdir(plot_folder) + 415 plt.savefig(plot_path) + 416 plt.close(fig) + 417 + 418 def plot_depth_density_salinity_mld_scatter(self): + 419 """ + 420 Generates a scatter plot of depth vs. salinity and density. + 421 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. + 422 Saves the plot as an image file. + 423 """ + 424 df = self._ctd_array.copy() + 425 filename = self._filename + 426 plt.rcParams.update({'font.size': 16}) + 427 df_filtered = df + 428 if df_filtered.empty: + 429 plt.close() + 430 return + 431 df_filtered = df_filtered.reset_index(drop=True) + 432 fig, ax1 = plt.subplots(figsize=(18, 18)) + 433 ax1.invert_yaxis() + 434 # Dynamically set y-axis limits based on depth data + 435 max_depth = df_filtered['depth_00'].max() + 436 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards + 437 color_salinity = 'tab:blue' + 438 ax1.scatter(df_filtered['salinity_00'], df_filtered['depth_00'], color=color_salinity, + 439 label='Practical Salinity') + 440 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) + 441 ax1.set_ylabel('Depth (m)') + 442 ax1.tick_params(axis='x', labelcolor=color_salinity) + 443 ax2 = ax1.twiny() + 444 color_density = 'tab:red' + 445 ax2.scatter(df_filtered['density'], df_filtered['depth_00'], color=color_density, label='Density (kg/m^3)') + 446 ax2.set_xlabel('Density (kg/m^3)', color=color_density) + 447 ax2.tick_params(axis='x', labelcolor=color_density) + 448 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) + 449 mld_cols = [] + 450 for col in df.columns: + 451 if 'MLD' in col and 'Actual' not in col: + 452 mld_cols.append(df[col]) + 453 refdepth_cols = [] + 454 for col in df.columns: + 455 if 'Actual' in col: + 456 refdepth_cols.append(df[col]) + 457 for idx, mld_col in enumerate(mld_cols): + 458 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', + 459 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') + 460 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', + 461 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) + 462 if df_filtered['overturn'].any(): + 463 plt.title( + 464 f"{filename}\n Depth vs. Salinity and Density " + 465 f"\n THIS IS AN UNSTABLE WATER COLUMN " + 466 f"\n(Higher density fluid lies above lower density fluid)") + 467 else: + 468 plt.title( + 469 f"{filename}\n Depth vs. Salinity and Density " + 470 f"\n THIS IS AN UNSTABLE WATER COLUMN " + 471 f"\n(Higher density fluid lies above lower density fluid)") + 472 ax1.grid(True) + 473 lines, labels = ax1.get_legend_handles_labels() + 474 ax2_legend = ax2.get_legend_handles_labels() + 475 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='upper center', bbox_to_anchor=(0.5, -0.15), + 476 ncol=3) + 477 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes.png") + 478 plot_folder = os.path.join(self._cwd, "plots") + 479 if not (os.path.isdir(plot_folder)): + 480 os.mkdir(plot_folder) + 481 plt.savefig(plot_path) + 482 plt.close(fig) + 483 + 484 def plot_depth_temperature_scatter(self): + 485 """ + 486 Generates a scatter plot of depth vs. temperature. + 487 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. + 488 Saves the plot as an image file. + 489 """ + 490 df = self._ctd_array.copy() + 491 filename = self._filename + 492 plt.rcParams.update({'font.size': 16}) + 493 df_filtered = df + 494 if df_filtered.empty: + 495 plt.close() + 496 return + 497 df_filtered = df_filtered.reset_index(drop=True) + 498 fig, ax1 = plt.subplots(figsize=(18, 18)) + 499 ax1.invert_yaxis() + 500 # Dynamically set y-axis limits based on depth data + 501 max_depth = df_filtered['depth_00'].max() + 502 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards + 503 + 504 color_temp = 'tab:blue' + 505 ax1.scatter(df_filtered['temperature_00'], df_filtered['depth_00'], color=color_temp, + 506 label="Temperature (°C)") + 507 ax1.set_xlabel("Temperature (°C)", color=color_temp) + 508 ax1.set_ylabel('Depth (m)') + 509 ax1.tick_params(axis='x', labelcolor=color_temp) + 510 mld_cols = [] + 511 for col in df.columns: + 512 if "MLD" in col and "Actual" not in col: + 513 mld_cols.append(df[col]) + 514 refdepth_cols = [] + 515 for col in df.columns: + 516 if "Reference Depth" in col: + 517 refdepth_cols.append(df[col]) + 518 for idx, mld_col in enumerate(mld_cols): + 519 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', + 520 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') + 521 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', + 522 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) + 523 if df_filtered['overturn'].any(): + 524 plt.title( + 525 f"{filename}\n Depth vs. Temperature \n " + 526 f"THIS IS AN UNSTABLE WATER COLUMN \n" + 527 f"(Higher density fluid lies above lower density fluid)") + 528 else: + 529 plt.title( + 530 f"{filename}\n Depth vs. Temperature \n " + 531 f"THIS IS AN UNSTABLE WATER COLUMN \n" + 532 f"(Higher density fluid lies above lower density fluid)") + 533 ax1.grid(True) + 534 lines, labels = ax1.get_legend_handles_labels() + 535 ax1.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3) + 536 plot_path = os.path.join(self._cwd, f"plots/{filename}_temperature_depth_plot.png") + 537 plot_folder = os.path.join(self._cwd, "plots") + 538 if not (os.path.isdir(plot_folder)): + 539 os.mkdir(plot_folder) + 540 plt.savefig(plot_path) + 541 plt.close(fig) + 542 + 543 class Utility: + 544 """ + 545 Utility + 546 -------- + 547 Utility class for CTD data processing. + 548 + 549 Attributes + 550 ---------- + 551 filename : str + 552 Filename of the RSK file. + 553 mastersheet : str + 554 Path to the master sheet Excel file. + 555 """ + 556 + 557 def __init__(self, filename): + 558 """ + 559 Initialize a new Utility object. + 560 Parameters + 561 ---------- + 562 filename : str + 563 The filename of the RSK file. + 564 """ + 565 self.filename = filename + 566 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) + 567 + 568 def no_values_in_object(self, object_to_check): + 569 """ + 570 Checks if the given object is None, empty, or has a length greater than 0. + 571 Returns True if the object has no values, False otherwise. + 572 + 573 Parameters + 574 ---------- + 575 object_to_check : object + 576 The object to check for values. + 577 Returns + 578 -------- + 579 bool + 580 True if the object has no values, False otherwise. + 581 """ + 582 if isinstance(object_to_check, type(None)): 583 return True - 584 if len(object_to_check) > 0: - 585 return False - 586 - 587 def process_master_sheet(self, master_sheet_path, filename): - 588 """ - 589 Extracts the date and time components from the filename and compares them with the data - 590 in the master sheet. Calculates the absolute differences between the dates and times to - 591 find the closest match. Returns the estimated latitude, longitude, and updated filename - 592 based on the closest match. - 593 - 594 Parameters - 595 ---------- - 596 master_sheet_path : str - 597 The path to the master sheet Excel file. - 598 - 599 filename : str - 600 The filename of the RSK file. - 601 - 602 Returns - 603 ------- - 604 tuple - 605 A tuple containing the estimated latitude, longitude, and updated filename. - 606 """ - 607 - 608 def get_date_from_string(filename): - 609 try: - 610 year = filename.split('_')[1][:4] - 611 month = filename.split('_')[1][4:6] - 612 day = filename.split('_')[1][6:] - 613 hour = filename.split('_')[2][0:2] - 614 minute = filename.split('_')[2][2:4] - 615 time = f"{hour}:{minute}" - 616 return float(year), float(month), float(day), time - 617 except: - 618 return None, None, None, None - 619 - 620 # Function to calculate the absolute difference between two dates - 621 def date_difference(row, target_year, target_month, target_day): - 622 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( - 623 row['day'] - target_day) - 624 - 625 # Function to calculate the absolute difference between two times - 626 def time_difference(target_time, df_time): - 627 df_time_str = str(df_time) - 628 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] - 629 try: - 630 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] - 631 except: - 632 return None - 633 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) - 634 - 635 # Load the master sheet - 636 master_df = pd.read_excel(master_sheet_path) - 637 # Get date and time components from the filename - 638 year, month, day, time = get_date_from_string(filename) - 639 if year is None: - 640 return - 641 # Calculate absolute differences for each row in 'master_df' - 642 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) - 643 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) - 644 # Find the rows with the smallest total difference for date - 645 smallest_date_difference = master_df['date_difference'].min() - 646 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] - 647 # Check if time_difference returns None - 648 if closest_date_rows['time_difference'].isnull().any(): - 649 closest_time_time = None - 650 closest_row_overall = closest_date_rows.iloc[0] - 651 else: - 652 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference - 653 if len(closest_date_rows) > 1: - 654 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] - 655 closest_row_overall = closest_time_row - 656 closest_time_time = closest_row_overall['time_local'] - 657 else: - 658 closest_row_overall = closest_date_rows.iloc[0] - 659 closest_time_time = closest_row_overall['time_local'] - 660 latitude = closest_row_overall['latitude'] - 661 longitude = closest_row_overall['longitude'] - 662 unique_id = closest_row_overall.iloc[0] - 663 RBRfilename = filename + "_gpscm" - 664 # Access the closest date components - 665 closest_date_year = closest_row_overall['year'] - 666 closest_date_month = closest_row_overall['month'] - 667 closest_date_day = closest_row_overall['day'] - 668 # Print the closest date and time - 669 print("|-ESTIMATION ALERT-|") - 670 print("Had to guess location on file: " + filename) - 671 print("Unique ID: " + unique_id) - 672 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) - 673 print("Lat: " + str(latitude)) - 674 print("Long: " + str(longitude)) - 675 if closest_time_time: - 676 print("Closest Time:", closest_time_time) - 677 print("====================") - 678 return latitude, longitude, RBRfilename - 679 - 680 def get_sample_location(self, rsk, filename): - 681 """ - 682 Retrieves the sample location data from the RSK file. If no location data is found, - 683 it attempts to estimate the location using the master sheet. Returns the latitude, - 684 longitude, and updated filename. - 685 - 686 Parameters - 687 ---------- - 688 rsk : RSK - 689 Ruskin object of the RSK file. - 690 filename : str - 691 The filename of the RSK file. + 584 if object_to_check.empty: + 585 return True + 586 if len(object_to_check) > 0: + 587 return False + 588 + 589 def process_master_sheet(self, master_sheet_path, filename): + 590 """ + 591 Extracts the date and time components from the filename and compares them with the data + 592 in the master sheet. Calculates the absolute differences between the dates and times to + 593 find the closest match. Returns the estimated latitude, longitude, and updated filename + 594 based on the closest match. + 595 + 596 Parameters + 597 ---------- + 598 master_sheet_path : str + 599 The path to the master sheet Excel file. + 600 + 601 filename : str + 602 The filename of the RSK file. + 603 + 604 Returns + 605 ------- + 606 tuple + 607 A tuple containing the estimated latitude, longitude, and updated filename. + 608 """ + 609 + 610 def get_date_from_string(filename): + 611 try: + 612 year = filename.split('_')[1][:4] + 613 month = filename.split('_')[1][4:6] + 614 day = filename.split('_')[1][6:] + 615 hour = filename.split('_')[2][0:2] + 616 minute = filename.split('_')[2][2:4] + 617 time = f"{hour}:{minute}" + 618 return float(year), float(month), float(day), time + 619 except: + 620 return None, None, None, None + 621 + 622 # Function to calculate the absolute difference between two dates + 623 def date_difference(row, target_year, target_month, target_day): + 624 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( + 625 row['day'] - target_day) + 626 + 627 # Function to calculate the absolute difference between two times + 628 def time_difference(target_time, df_time): + 629 df_time_str = str(df_time) + 630 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] + 631 try: + 632 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] + 633 except: + 634 return None + 635 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) + 636 + 637 # Check if the master sheet is already cached + 638 if CTD._cached_master_sheet is None: + 639 # Load the master sheet and cache it + 640 CTD._cached_master_sheet = pd.read_excel(master_sheet_path) + 641 + 642 # Use the cached master sheet data + 643 master_df = CTD._cached_master_sheet.copy() + 644 # Get date and time components from the filename + 645 year, month, day, time = get_date_from_string(filename) + 646 if year is None: + 647 return + 648 # Calculate absolute differences for each row in 'master_df' + 649 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) + 650 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) + 651 # Find the rows with the smallest total difference for date + 652 smallest_date_difference = master_df['date_difference'].min() + 653 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] + 654 # Check if time_difference returns None + 655 if closest_date_rows['time_difference'].isnull().any(): + 656 closest_time_time = None + 657 closest_row_overall = closest_date_rows.iloc[0] + 658 else: + 659 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference + 660 if len(closest_date_rows) > 1: + 661 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] + 662 closest_row_overall = closest_time_row + 663 closest_time_time = closest_row_overall['time_local'] + 664 else: + 665 closest_row_overall = closest_date_rows.iloc[0] + 666 closest_time_time = closest_row_overall['time_local'] + 667 latitude = closest_row_overall['latitude'] + 668 longitude = closest_row_overall['longitude'] + 669 unique_id = closest_row_overall.iloc[0] + 670 RBRfilename = filename + "_gpscm" + 671 # Access the closest date components + 672 closest_date_year = closest_row_overall['year'] + 673 closest_date_month = closest_row_overall['month'] + 674 closest_date_day = closest_row_overall['day'] + 675 # Print the closest date and time + 676 print("|-ESTIMATION ALERT-|") + 677 print("Had to guess location on file: " + filename) + 678 print("Unique ID: " + unique_id) + 679 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) + 680 print("Lat: " + str(latitude)) + 681 print("Long: " + str(longitude)) + 682 if closest_time_time: + 683 print("Closest Time:", closest_time_time) + 684 print("====================") + 685 return latitude, longitude, RBRfilename + 686 + 687 def get_sample_location(self, rsk, filename): + 688 """ + 689 Retrieves the sample location data from the RSK file. If no location data is found, + 690 it attempts to estimate the location using the master sheet. Returns the latitude, + 691 longitude, and updated filename. 692 - 693 Returns - 694 ------- - 695 tuple - 696 A tuple containing the latitude associated with the sample, longitude associated with the sample, - 697 and the filename, adds _gps if the location was in the ruskin file, - 698 _gpscm if located via mastersheet, or _gpserror if unable to locate. - 699 """ - 700 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one - 701 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) - 702 if geo_data_length < 1: - 703 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( - 704 self.mastersheet, filename) - 705 return latitude_intermediate, longitude_intermediate, filename - 706 else: - 707 for geo in itertools.islice(rsk.geodata(), None): - 708 # Is there geo data? - 709 if geo.latitude is not None: - 710 # If there is, is it from the southern ocean? - 711 if not (geo.latitude > -60): - 712 try: - 713 latitude_intermediate = geo.latitude[0] - 714 longitude_intermediate = geo.longitude[0] - 715 filename += "_gps" - 716 return latitude_intermediate, longitude_intermediate, filename - 717 except: - 718 latitude_intermediate = geo.latitude - 719 longitude_intermediate = geo.longitude - 720 filename += "_gps" - 721 return latitude_intermediate, longitude_intermediate, filename - 722 else: - 723 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( - 724 self.mastersheet, filename) - 725 return latitude_intermediate, longitude_intermediate, filename - 726 else: - 727 return None, None, filename + 'gpserror' - 728 - 729 def remove_sample_timezone_indicator(self, df): - 730 """ - 731 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the - 732 given DataFrame. Returns the updated DataFrame. - 733 - 734 Parameters - 735 ---------- - 736 df : DataFrame - 737 The DataFrame to process. - 738 - 739 Returns - 740 ------- - 741 DataFrame - 742 The updated DataFrame with the timezone indicator removed. - 743 """ - 744 if self.no_values_in_object(df): - 745 return None - 746 if 'timestamp' in df.columns: - 747 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] - 748 return df - 749 else: - 750 return df - 751 - 752 def remove_rows_with_negative_depth(self, df): - 753 """ - 754 Removes rows from the given DataFrame where the 'depth_00' column has negative values. - 755 Returns the updated DataFrame. - 756 - 757 Parameter - 758 --------- - 759 df : DataFrame - 760 The DataFrame to process. - 761 - 762 Returns - 763 ------- - 764 DataFrame - 765 The updated DataFrame with rows containing negative depth values removed. - 766 """ - 767 if self.no_values_in_object(df): - 768 return None - 769 if 'depth_00' in df.columns: - 770 df = df[df['depth_00'] >= 0].reset_index(drop=True) - 771 else: - 772 return None - 773 if self.no_values_in_object(df): - 774 return None - 775 return df.copy() - 776 - 777 def remove_rows_with_negative_salinity(self, df): - 778 """ - 779 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. - 780 Returns the updated DataFrame. - 781 - 782 Parameters - 783 ---------- - 784 df: DataFrame - 785 The DataFrame to process. - 786 - 787 Returns - 788 ------- - 789 DataFrame - 790 The updated DataFrame with rows containing negative salinity values removed. - 791 """ - 792 if self.no_values_in_object(df): - 793 return None - 794 if 'salinity_00' in df.columns: - 795 df = df[df['salinity_00'] >= 0].reset_index(drop=True) - 796 else: - 797 return None - 798 if self.no_values_in_object(df): - 799 return None - 800 return df.copy() - 801 - 802 def remove_rows_with_negative_pressure(self, df): - 803 """ - 804 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. - 805 Returns the updated DataFrame. - 806 - 807 Parameters - 808 ---------- - 809 df: DataFrame - 810 The DataFrame to process. - 811 - 812 Returns - 813 ------- - 814 DataFrame - 815 The updated DataFrame with rows containing negative pressure values removed. - 816 """ - 817 if self.no_values_in_object(df): - 818 return None - 819 if 'pressure_00' in df.columns: - 820 df = df[df['pressure_00'] >= 0].reset_index(drop=True) - 821 else: - 822 return None - 823 if self.no_values_in_object(df): - 824 return None - 825 return df.copy() - 826 - 827 def remove_rows_with_negative_salinityabs(self, df): - 828 """ - 829 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. - 830 Returns the updated DataFrame. - 831 - 832 Parameters - 833 ---------- - 834 df: DataFrame - 835 The DataFrame to process. - 836 - 837 Returns - 838 ------- - 839 DataFrame - 840 The updated DataFrame with rows containing negative absolute salinity values removed. - 841 """ - 842 if self.no_values_in_object(df): - 843 return None - 844 if 'salinityabs' in df.columns: - 845 df = df[df['salinityabs'] >= 0].reset_index(drop=True) - 846 else: - 847 return None - 848 if self.no_values_in_object(df): - 849 return None - 850 return df.copy() - 851 - 852 def remove_rows_with_negative_density(self, df): - 853 """ - 854 Removes rows from the given DataFrame where the 'density' column has negative values. - 855 Returns the updated DataFrame. - 856 - 857 Parameters - 858 ---------- - 859 df: DataFrame - 860 The DataFrame to process. - 861 - 862 Returns - 863 ------- - 864 DataFrame - 865 The updated DataFrame with rows containing negative density values removed. - 866 """ - 867 if self.no_values_in_object(df): - 868 return None - 869 if 'density' in df.columns: - 870 df = df[df['density'] >= 0].reset_index(drop=True) - 871 else: - 872 return None - 873 if self.no_values_in_object(df): - 874 return None - 875 return df.copy() - 876 - 877 - 878class Calculate: - 879 """ - 880 Calculate - 881 ---------- - 882 - 883 Class for CTD data calculations. - 884 """ - 885 - 886 @staticmethod - 887 def gsw_infunnel(SA, CT, p): - 888 """ - 889 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), - 890 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. - 891 - 892 Parameters - 893 ---------- - 894 SA : Series - 895 Absolute Salinity in g/kg. - 896 CT : Series - 897 Conservative Temperature in degrees Celsius. - 898 p : Series - 899 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). - 900 - 901 Returns - 902 ------- - 903 Series of bool - 904 A boolean array where True indicates the values are inside the "oceanographic funnel". - 905 """ - 906 # Ensure all inputs are Series and aligned - 907 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( - 908 isinstance(p, pd.Series) or np.isscalar(p))): - 909 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") - 910 - 911 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: - 912 raise CTDError("", "Indices of SA, CT, and p must be aligned") - 913 - 914 if np.isscalar(p): - 915 p = pd.Series(p, index=SA.index) - 916 - 917 # Define the funnel conditions - 918 CT_freezing_p = gsw.CT_freezing(SA, p, 0) - 919 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) + 693 Parameters + 694 ---------- + 695 rsk : RSK + 696 Ruskin object of the RSK file. + 697 filename : str + 698 The filename of the RSK file. + 699 + 700 Returns + 701 ------- + 702 tuple + 703 A tuple containing the latitude associated with the sample, longitude associated with the sample, + 704 and the filename, adds _gps if the location was in the ruskin file, + 705 _gpscm if located via mastersheet, or _gpserror if unable to locate. + 706 """ + 707 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one + 708 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) + 709 if geo_data_length < 1: + 710 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( + 711 self.mastersheet, filename) + 712 return latitude_intermediate, longitude_intermediate, filename + 713 else: + 714 for geo in itertools.islice(rsk.geodata(), None): + 715 # Is there geo data? + 716 if geo.latitude is not None: + 717 # If there is, is it from the southern ocean? + 718 if not (geo.latitude > -60): + 719 try: + 720 latitude_intermediate = geo.latitude[0] + 721 longitude_intermediate = geo.longitude[0] + 722 filename += "_gps" + 723 return latitude_intermediate, longitude_intermediate, filename + 724 except: + 725 latitude_intermediate = geo.latitude + 726 longitude_intermediate = geo.longitude + 727 filename += "_gps" + 728 return latitude_intermediate, longitude_intermediate, filename + 729 else: + 730 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( + 731 self.mastersheet, filename) + 732 return latitude_intermediate, longitude_intermediate, filename + 733 else: + 734 return None, None, filename + 'gpserror' + 735 + 736 def remove_sample_timezone_indicator(self, df): + 737 """ + 738 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the + 739 given DataFrame. Returns the updated DataFrame. + 740 + 741 Parameters + 742 ---------- + 743 df : DataFrame + 744 The DataFrame to process. + 745 + 746 Returns + 747 ------- + 748 DataFrame + 749 The updated DataFrame with the timezone indicator removed. + 750 """ + 751 if self.no_values_in_object(df): + 752 return None + 753 if 'timestamp' in df.columns: + 754 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] + 755 return df + 756 else: + 757 return df + 758 + 759 def remove_rows_with_negative_depth(self, df): + 760 """ + 761 Removes rows from the given DataFrame where the 'depth_00' column has negative values. + 762 Returns the updated DataFrame. + 763 + 764 Parameter + 765 --------- + 766 df : DataFrame + 767 The DataFrame to process. + 768 + 769 Returns + 770 ------- + 771 DataFrame + 772 The updated DataFrame with rows containing negative depth values removed. + 773 """ + 774 if self.no_values_in_object(df): + 775 return None + 776 if 'depth_00' in df.columns: + 777 df = df[df['depth_00'] >= 0].reset_index(drop=True) + 778 else: + 779 return None + 780 if self.no_values_in_object(df): + 781 return None + 782 return df.copy() + 783 + 784 def remove_rows_with_negative_salinity(self, df): + 785 """ + 786 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. + 787 Returns the updated DataFrame. + 788 + 789 Parameters + 790 ---------- + 791 df: DataFrame + 792 The DataFrame to process. + 793 + 794 Returns + 795 ------- + 796 DataFrame + 797 The updated DataFrame with rows containing negative salinity values removed. + 798 """ + 799 if self.no_values_in_object(df): + 800 return None + 801 if 'salinity_00' in df.columns: + 802 df = df[df['salinity_00'] >= 0].reset_index(drop=True) + 803 else: + 804 return None + 805 if self.no_values_in_object(df): + 806 return None + 807 return df.copy() + 808 + 809 def remove_rows_with_negative_pressure(self, df): + 810 """ + 811 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. + 812 Returns the updated DataFrame. + 813 + 814 Parameters + 815 ---------- + 816 df: DataFrame + 817 The DataFrame to process. + 818 + 819 Returns + 820 ------- + 821 DataFrame + 822 The updated DataFrame with rows containing negative pressure values removed. + 823 """ + 824 if self.no_values_in_object(df): + 825 return None + 826 if 'pressure_00' in df.columns: + 827 df = df[df['pressure_00'] >= 0].reset_index(drop=True) + 828 else: + 829 return None + 830 if self.no_values_in_object(df): + 831 return None + 832 return df.copy() + 833 + 834 def remove_rows_with_negative_salinityabs(self, df): + 835 """ + 836 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. + 837 Returns the updated DataFrame. + 838 + 839 Parameters + 840 ---------- + 841 df: DataFrame + 842 The DataFrame to process. + 843 + 844 Returns + 845 ------- + 846 DataFrame + 847 The updated DataFrame with rows containing negative absolute salinity values removed. + 848 """ + 849 if self.no_values_in_object(df): + 850 return None + 851 if 'salinityabs' in df.columns: + 852 df = df[df['salinityabs'] >= 0].reset_index(drop=True) + 853 else: + 854 return None + 855 if self.no_values_in_object(df): + 856 return None + 857 return df.copy() + 858 + 859 def remove_rows_with_negative_density(self, df): + 860 """ + 861 Removes rows from the given DataFrame where the 'density' column has negative values. + 862 Returns the updated DataFrame. + 863 + 864 Parameters + 865 ---------- + 866 df: DataFrame + 867 The DataFrame to process. + 868 + 869 Returns + 870 ------- + 871 DataFrame + 872 The updated DataFrame with rows containing negative density values removed. + 873 """ + 874 if self.no_values_in_object(df): + 875 return None + 876 if 'density' in df.columns: + 877 df = df[df['density'] >= 0].reset_index(drop=True) + 878 else: + 879 return None + 880 if self.no_values_in_object(df): + 881 return None + 882 return df.copy() + 883 + 884 + 885class Calculate: + 886 """ + 887 Calculate + 888 ---------- + 889 + 890 Class for CTD data calculations. + 891 """ + 892 + 893 @staticmethod + 894 def gsw_infunnel(SA, CT, p): + 895 """ + 896 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), + 897 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. + 898 + 899 Parameters + 900 ---------- + 901 SA : Series + 902 Absolute Salinity in g/kg. + 903 CT : Series + 904 Conservative Temperature in degrees Celsius. + 905 p : Series + 906 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). + 907 + 908 Returns + 909 ------- + 910 Series of bool + 911 A boolean array where True indicates the values are inside the "oceanographic funnel". + 912 """ + 913 # Ensure all inputs are Series and aligned + 914 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( + 915 isinstance(p, pd.Series) or np.isscalar(p))): + 916 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") + 917 + 918 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: + 919 raise CTDError("", "Indices of SA, CT, and p must be aligned") 920 - 921 in_funnel = pd.Series(True, index=SA.index) # Default all to True - 922 condition = ( - 923 (p > 8000) | - 924 (SA < 0) | (SA > 42) | - 925 ((p < 500) & (CT < CT_freezing_p)) | - 926 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | - 927 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | - 928 ((p >= 500) & (CT < CT_freezing_500)) | - 929 ((p >= 6500) & (SA < 30)) | - 930 ((p >= 6500) & (CT > 10.0)) | - 931 SA.isna() | CT.isna() | p.isna() - 932 ) - 933 in_funnel[condition] = False - 934 - 935 return in_funnel - 936 - 937 @staticmethod - 938 def calculate_and_drop_salinity_spikes(df): - 939 """ - 940 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable - 941 changes in salinity with depth. - 942 - 943 Parameters - 944 ---------- - 945 df : DataFrame - 946 DataFrame containing depth and salinity data - 947 - 948 Returns - 949 ------- - 950 DataFrame - 951 DataFrame after removing salinity spikes - 952 """ - 953 acceptable_delta_salinity_per_depth = [ - 954 (0.0005, 0.001), - 955 (0.005, 0.01), - 956 (0.05, 0.1), - 957 (0.5, 1) - 958 ] - 959 if df.empty: - 960 return None - 961 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors - 962 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') - 963 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') - 964 - 965 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN - 966 df = df.dropna(subset=['depth_00', 'salinity_00']) - 967 - 968 # Check if there is enough depth range to calculate - 969 min_depth = df['depth_00'].min() - 970 max_depth = df['depth_00'].max() - 971 if min_depth == max_depth: - 972 print("Insufficient depth range to calculate.") - 973 return df + 921 if np.isscalar(p): + 922 p = pd.Series(p, index=SA.index) + 923 + 924 # Define the funnel conditions + 925 CT_freezing_p = gsw.CT_freezing(SA, p, 0) + 926 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) + 927 + 928 in_funnel = pd.Series(True, index=SA.index) # Default all to True + 929 condition = ( + 930 (p > 8000) | + 931 (SA < 0) | (SA > 42) | + 932 ((p < 500) & (CT < CT_freezing_p)) | + 933 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | + 934 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | + 935 ((p >= 500) & (CT < CT_freezing_500)) | + 936 ((p >= 6500) & (SA < 30)) | + 937 ((p >= 6500) & (CT > 10.0)) | + 938 SA.isna() | CT.isna() | p.isna() + 939 ) + 940 in_funnel[condition] = False + 941 + 942 return in_funnel + 943 + 944 @staticmethod + 945 def calculate_and_drop_salinity_spikes(df): + 946 """ + 947 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable + 948 changes in salinity with depth. + 949 + 950 Parameters + 951 ---------- + 952 df : DataFrame + 953 DataFrame containing depth and salinity data + 954 + 955 Returns + 956 ------- + 957 DataFrame + 958 DataFrame after removing salinity spikes + 959 """ + 960 acceptable_delta_salinity_per_depth = [ + 961 (0.0005, 0.001), + 962 (0.005, 0.01), + 963 (0.05, 0.1), + 964 (0.5, 1) + 965 ] + 966 if df.empty: + 967 return None + 968 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors + 969 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') + 970 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') + 971 + 972 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN + 973 df = df.dropna(subset=['depth_00', 'salinity_00']) 974 - 975 def recursively_drop(df, depth_range, acceptable_delta, i): - 976 try: - 977 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points - 978 except: - 979 print("Error in calculating number of points.") - 980 return df - 981 ranges = np.linspace(min_depth, max_depth, num=num_points) - 982 - 983 # Group by these ranges - 984 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) - 985 - 986 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 - 987 filtered_groups = groups.filter( - 988 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) - 989 # Get the indices of the filtered groups - 990 filtered_indices = filtered_groups.index - 991 return filtered_groups + 975 # Check if there is enough depth range to calculate + 976 min_depth = df['depth_00'].min() + 977 max_depth = df['depth_00'].max() + 978 if min_depth == max_depth: + 979 print("Insufficient depth range to calculate.") + 980 return df + 981 + 982 def recursively_drop(df, depth_range, acceptable_delta, i): + 983 try: + 984 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points + 985 except: + 986 print("Error in calculating number of points.") + 987 return df + 988 ranges = np.linspace(min_depth, max_depth, num=num_points) + 989 + 990 # Group by these ranges + 991 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) 992 - 993 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): - 994 df = recursively_drop(df, deltas[0], deltas[1], i) - 995 return df - 996 - 997 @staticmethod - 998 def calculate_overturns(ctd_array): - 999 """ -1000 Calculates density overturns in the CTD data where denser water lies above lighter water with density -1001 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. -1002 -1003 Parameters -1004 ---------- -1005 ctd_array : DataFrame -1006 DataFrame containing depth, density, and timestamp data -1007 -1008 Returns -1009 ------- -1010 DataFrame -1011 DataFrame with identified density overturns -1012 """ -1013 # Sort DataFrame by depth in ascending order -1014 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) -1015 # Calculate density change and identify overturns -1016 ctd_array['density_change'] = ctd_array[ -1017 'density'].diff() # Difference in density between consecutive measurements -1018 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 -1019 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) -1020 if 'density_change' in ctd_array.columns: -1021 ctd_array = ctd_array.drop('density_change', axis=1) -1022 return ctd_array -1023 -1024 @staticmethod -1025 def calculate_absolute_density(ctd_array): -1026 """ -1027 Calculates absolute density from the CTD data using the TEOS-10 equations, -1028 ensuring all data points are within the valid oceanographic funnel. -1029 -1030 Parameters -1031 ---------- -1032 ctd_array : DataFrame -1033 DataFrame containing salinity, temperature, and pressure data -1034 -1035 Returns -1036 ------- -1037 Series -1038 Series with calculated absolute density -1039 """ -1040 SA = ctd_array['salinity_00'] -1041 t = ctd_array['temperature_00'] -1042 p = ctd_array['pressure_00'] -1043 CT = gsw.CT_from_t(SA, t, p) -1044 if Calculate.gsw_infunnel(SA, CT, p).all(): -1045 return gsw.density.rho_t_exact(SA, t, p) -1046 else: -1047 raise CTDError("", "Sample not in funnel, could not calculate density.") -1048 -1049 @staticmethod -1050 def calculate_absolute_salinity(ctd_array): -1051 """ -1052 Calculates absolute salinity from practical salinity, pressure, -1053 and geographical coordinates using the TEOS-10 salinity conversion formulas. -1054 -1055 Parameters -1056 ---------- -1057 ctd_array : DataFrame -1058 DataFrame containing practical salinity, pressure, longitude, and latitude data -1059 -1060 Returns -1061 ------- -1062 Series -1063 Series with calculated absolute salinity -1064 """ -1065 SP = ctd_array['salinity_00'] -1066 p = ctd_array['pressure_00'] -1067 lon = ctd_array['longitude'] -1068 lat = ctd_array['latitude'] -1069 return gsw.conversions.SA_from_SP(SP, p, lon, lat) -1070 -1071 @staticmethod -1072 def calculate_mld(densities, depths, reference_depth, delta = 0.03): -1073 """ -1074 Calculates the mixed layer depth (MLD) using the density threshold method. -1075 MLD is the depth at which the density exceeds the reference density -1076 by a predefined amount delta, which defaults to (0.03 kg/m³). + 993 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 + 994 filtered_groups = groups.filter( + 995 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) + 996 # Get the indices of the filtered groups + 997 filtered_indices = filtered_groups.index + 998 return filtered_groups + 999 +1000 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): +1001 df = recursively_drop(df, deltas[0], deltas[1], i) +1002 return df +1003 +1004 @staticmethod +1005 def calculate_overturns(ctd_array): +1006 """ +1007 Calculates density overturns in the CTD data where denser water lies above lighter water with density +1008 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. +1009 +1010 Parameters +1011 ---------- +1012 ctd_array : DataFrame +1013 DataFrame containing depth, density, and timestamp data +1014 +1015 Returns +1016 ------- +1017 DataFrame +1018 DataFrame with identified density overturns +1019 """ +1020 # Sort DataFrame by depth in ascending order +1021 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) +1022 # Calculate density change and identify overturns +1023 ctd_array['density_change'] = ctd_array[ +1024 'density'].diff() # Difference in density between consecutive measurements +1025 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 +1026 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) +1027 if 'density_change' in ctd_array.columns: +1028 ctd_array = ctd_array.drop('density_change', axis=1) +1029 return ctd_array +1030 +1031 @staticmethod +1032 def calculate_absolute_density(ctd_array): +1033 """ +1034 Calculates absolute density from the CTD data using the TEOS-10 equations, +1035 ensuring all data points are within the valid oceanographic funnel. +1036 +1037 Parameters +1038 ---------- +1039 ctd_array : DataFrame +1040 DataFrame containing salinity, temperature, and pressure data +1041 +1042 Returns +1043 ------- +1044 Series +1045 Series with calculated absolute density +1046 """ +1047 SA = ctd_array['salinity_00'] +1048 t = ctd_array['temperature_00'] +1049 p = ctd_array['pressure_00'] +1050 CT = gsw.CT_from_t(SA, t, p) +1051 if Calculate.gsw_infunnel(SA, CT, p).all(): +1052 return gsw.density.rho_t_exact(SA, t, p) +1053 else: +1054 raise CTDError("", "Sample not in funnel, could not calculate density.") +1055 +1056 @staticmethod +1057 def calculate_absolute_salinity(ctd_array): +1058 """ +1059 Calculates absolute salinity from practical salinity, pressure, +1060 and geographical coordinates using the TEOS-10 salinity conversion formulas. +1061 +1062 Parameters +1063 ---------- +1064 ctd_array : DataFrame +1065 DataFrame containing practical salinity, pressure, longitude, and latitude data +1066 +1067 Returns +1068 ------- +1069 Series +1070 Series with calculated absolute salinity +1071 """ +1072 SP = ctd_array['salinity_00'] +1073 p = ctd_array['pressure_00'] +1074 lon = ctd_array['longitude'] +1075 lat = ctd_array['latitude'] +1076 return gsw.conversions.SA_from_SP(SP, p, lon, lat) 1077 -1078 Parameters -1079 ---------- -1080 densities : Series -1081 Series of densities -1082 depths : Series -1083 Series of depths corresponding to densities -1084 reference_depth : float -1085 The depth at which to anchor the reference density -1086 delta : float, optional -1087 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1088 -1089 Returns -1090 ------- -1091 tuple -1092 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1093 """ -1094 # Convert to numeric and ensure no NaNs remain -1095 densities = densities.apply(pd.to_numeric, errors='coerce') -1096 depths = depths.apply(pd.to_numeric, errors='coerce') -1097 densities = densities.dropna(how='any').reset_index(drop=True) -1098 depths = depths.dropna(how='any').reset_index(drop=True) -1099 reference_depth = int(reference_depth) -1100 if len(depths) == 0 or len(densities) == 0: -1101 return None -1102 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) -1103 sorted_depths, sorted_densities = zip(*sorted_data) -1104 # Determine reference density -1105 reference_density = None -1106 for i, depth in enumerate(sorted_depths): -1107 if depth >= reference_depth: -1108 if depth == reference_depth: -1109 reference_density = sorted_densities[i] -1110 reference_depth = sorted_depths[i] -1111 else: -1112 # Linear interpolation -1113 try: -1114 reference_density = sorted_densities[i - 1] + ( -1115 (sorted_densities[i] - sorted_densities[i - 1]) * ( -1116 (reference_depth - sorted_depths[i - 1]) / -1117 (sorted_depths[i] - sorted_depths[i - 1]))) -1118 except: -1119 raise CTDError("", -1120 f"Insufficient depth range to calculate MLD. " -1121 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") -1122 break -1123 if reference_density is None: -1124 return None -1125 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1126 for depth, density in zip(sorted_depths, sorted_densities): -1127 if density > reference_density + delta and depth >= reference_depth: -1128 return depth, reference_depth -1129 return None # If no depth meets the criterion -1130 -1131 @staticmethod -1132 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): -1133 """ -1134 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and -1135 then determine the depth where the smoothed density exceeds the reference density -1136 by a predefined amount which defaults to 0.03 kg/m³. +1078 @staticmethod +1079 def calculate_mld(densities, depths, reference_depth, delta = 0.03): +1080 """ +1081 Calculates the mixed layer depth (MLD) using the density threshold method. +1082 MLD is the depth at which the density exceeds the reference density +1083 by a predefined amount delta, which defaults to (0.03 kg/m³). +1084 +1085 Parameters +1086 ---------- +1087 densities : Series +1088 Series of densities +1089 depths : Series +1090 Series of depths corresponding to densities +1091 reference_depth : float +1092 The depth at which to anchor the reference density +1093 delta : float, optional +1094 The difference in density which would indicate the MLD, defaults to 0.03 kg/m³. +1095 +1096 Returns +1097 ------- +1098 tuple +1099 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1100 """ +1101 # Convert to numeric and ensure no NaNs remain +1102 densities = densities.apply(pd.to_numeric, errors='coerce') +1103 depths = depths.apply(pd.to_numeric, errors='coerce') +1104 densities = densities.dropna(how='any').reset_index(drop=True) +1105 depths = depths.dropna(how='any').reset_index(drop=True) +1106 reference_depth = int(reference_depth) +1107 if len(depths) == 0 or len(densities) == 0: +1108 return None +1109 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) +1110 sorted_depths, sorted_densities = zip(*sorted_data) +1111 # Determine reference density +1112 reference_density = None +1113 for i, depth in enumerate(sorted_depths): +1114 if depth >= reference_depth: +1115 if depth == reference_depth: +1116 reference_density = sorted_densities[i] +1117 reference_depth = sorted_depths[i] +1118 else: +1119 # Linear interpolation +1120 try: +1121 reference_density = sorted_densities[i - 1] + ( +1122 (sorted_densities[i] - sorted_densities[i - 1]) * ( +1123 (reference_depth - sorted_depths[i - 1]) / +1124 (sorted_depths[i] - sorted_depths[i - 1]))) +1125 except: +1126 raise CTDError("", +1127 f"Insufficient depth range to calculate MLD. " +1128 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") +1129 break +1130 if reference_density is None: +1131 return None +1132 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1133 for depth, density in zip(sorted_depths, sorted_densities): +1134 if density > reference_density + delta and depth >= reference_depth: +1135 return depth, reference_depth +1136 return None # If no depth meets the criterion 1137 -1138 Parameters -1139 ---------- -1140 densities : Series -1141 Series of densities -1142 depths : Series -1143 Series of depths corresponding to densities -1144 reference_depth : -1145 The depth at which to anchor the reference density -1146 delta : float, optional -1147 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1148 -1149 Returns -1150 ------- -1151 tuple -1152 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1153 """ -1154 # Ensure input is pandas Series and drop NA values -1155 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): -1156 densities = densities.dropna().reset_index(drop=True) -1157 depths = depths.dropna().reset_index(drop=True) -1158 -1159 # Convert to numeric and ensure no NaNs remain -1160 densities = densities.apply(pd.to_numeric, errors='coerce') -1161 depths = depths.apply(pd.to_numeric, errors='coerce') -1162 densities = densities.dropna().reset_index(drop=True) -1163 depths = depths.dropna().reset_index(drop=True) -1164 if densities.empty or depths.empty: -1165 return None, None -1166 -1167 # Convert pandas Series to numpy arrays for NumPy operations -1168 densities = densities.to_numpy() -1169 depths = depths.to_numpy() -1170 -1171 # Remove duplicates by averaging densities at the same depth -1172 unique_depths, indices = np.unique(depths, return_inverse=True) -1173 average_densities = np.zeros_like(unique_depths) -1174 np.add.at(average_densities, indices, densities) -1175 counts = np.zeros_like(unique_depths) -1176 np.add.at(counts, indices, 1) -1177 average_densities /= counts -1178 -1179 # Apply LOESS smoothing -1180 lowess = statsmodels.api.nonparametric.lowess -1181 smoothed = lowess(average_densities, unique_depths, frac=0.1) -1182 smoothed_depths, smoothed_densities = zip(*smoothed) -1183 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) -1184 -1185 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1186 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta -1187 and np.array(smoothed_densities) > reference_depth)[0] -1188 if exceeding_indices.size > 0: -1189 mld_depth = smoothed_depths[exceeding_indices[0]] -1190 return mld_depth, reference_depth +1138 @staticmethod +1139 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): +1140 """ +1141 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and +1142 then determine the depth where the smoothed density exceeds the reference density +1143 by a predefined amount which defaults to 0.03 kg/m³. +1144 +1145 Parameters +1146 ---------- +1147 densities : Series +1148 Series of densities +1149 depths : Series +1150 Series of depths corresponding to densities +1151 reference_depth : +1152 The depth at which to anchor the reference density +1153 delta : float, optional +1154 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. +1155 +1156 Returns +1157 ------- +1158 tuple +1159 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1160 """ +1161 # Ensure input is pandas Series and drop NA values +1162 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): +1163 densities = densities.dropna().reset_index(drop=True) +1164 depths = depths.dropna().reset_index(drop=True) +1165 +1166 # Convert to numeric and ensure no NaNs remain +1167 densities = densities.apply(pd.to_numeric, errors='coerce') +1168 depths = depths.apply(pd.to_numeric, errors='coerce') +1169 densities = densities.dropna().reset_index(drop=True) +1170 depths = depths.dropna().reset_index(drop=True) +1171 if densities.empty or depths.empty: +1172 return None, None +1173 +1174 # Convert pandas Series to numpy arrays for NumPy operations +1175 densities = densities.to_numpy() +1176 depths = depths.to_numpy() +1177 +1178 # Remove duplicates by averaging densities at the same depth +1179 unique_depths, indices = np.unique(depths, return_inverse=True) +1180 average_densities = np.zeros_like(unique_depths) +1181 np.add.at(average_densities, indices, densities) +1182 counts = np.zeros_like(unique_depths) +1183 np.add.at(counts, indices, 1) +1184 average_densities /= counts +1185 +1186 # Apply LOESS smoothing +1187 lowess = statsmodels.api.nonparametric.lowess +1188 smoothed = lowess(average_densities, unique_depths, frac=0.1) +1189 smoothed_depths, smoothed_densities = zip(*smoothed) +1190 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) 1191 -1192 return None, None # If no depth meets the criterion -1193 -1194 @staticmethod -1195 def calculate_mean_surface_density(df, range_): -1196 """ -1197 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1192 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1193 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta +1194 and np.array(smoothed_densities) > reference_depth)[0] +1195 if exceeding_indices.size > 0: +1196 mld_depth = smoothed_depths[exceeding_indices[0]] +1197 return mld_depth, reference_depth 1198 -1199 Parameters -1200 ---------- -1201 df : DataFrame -1202 DataFrame containing density data. -1203 range_ : tuple or int -1204 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, -1205 or an integer indicating the number of rows from the start. -1206 -1207 Returns -1208 ------- -1209 float, None -1210 Mean density value of the specified sample or None if unable to calculate. -1211 """ -1212 min_depth = df.index.min() -1213 max_depth = df.index.max() -1214 -1215 if isinstance(range_, tuple): -1216 start, end = range_ -1217 -1218 # Adjust 'start' to ensure it is within the valid range -1219 start = max(start, min_depth) -1220 -1221 # Adjust 'end' to ensure it does not exceed the maximum depth value -1222 end = min(end, max_depth) -1223 -1224 # Ensure start is less than end -1225 if start <= end: -1226 return df.loc[start:end, 'density'].mean() -1227 else: -1228 return None -1229 -1230 elif isinstance(range_, int): -1231 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length -1232 range_ = min(range_, len(df)) -1233 return df.iloc[:range_, df.columns.get_loc('density')].mean() -1234 -1235 else: -1236 raise ValueError("Invalid range type. Must be tuple or int.") -1237 -1238 -1239class CTDError(Exception): -1240 """ -1241 Exception raised for CTD related errors. -1242 -1243 Parameters -1244 ---------- -1245 filename: input dataset which caused the error -1246 message: message -- explanation of the error -1247 """ -1248 -1249 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): -1250 self.filename = filename -1251 self.message = message -1252 super().__init__(self.message) -1253 -1254 -1255def run_default(plot=False): -1256 _reset_file_environment() -1257 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") -1258 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) -1259 for file in rsk_files_list: -1260 try: -1261 my_data = CTD(file) -1262 my_data.add_filename_to_table() -1263 my_data.save_to_csv("output.csv") -1264 my_data.add_location_to_table() -1265 my_data.remove_non_positive_samples() -1266 my_data.clean("practicalsalinity", 'salinitydiff') -1267 my_data.add_absolute_salinity() -1268 my_data.add_density() -1269 my_data.add_overturns() -1270 my_data.add_mld(1) -1271 my_data.add_mld(5) -1272 my_data.save_to_csv("outputclean.csv") -1273 if plot: -1274 my_data.plot_depth_density_salinity_mld_scatter() -1275 my_data.plot_depth_temperature_scatter() -1276 my_data.plot_depth_salinity_density_mld_line() -1277 except Exception as e: -1278 print(f"Error processing file: '{file}' {e}") -1279 continue -1280 -1281 -1282def merge_all_in_folder(): -1283 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") -1284 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) -1285 for file in rsk_files_list: -1286 try: -1287 my_data = CTD(file) -1288 my_data.add_filename_to_table() -1289 my_data.add_location_to_table() -1290 my_data.save_to_csv("output.csv") -1291 except Exception as e: -1292 print(e) -1293 continue -1294 -1295def get_rsk_filenames_in_dir(working_directory): -1296 rsk_files_list = [] -1297 rsk_filenames_no_path = [] -1298 for filename in os.listdir(working_directory): -1299 if filename.endswith('.rsk'): -1300 for filepath in rsk_files_list: -1301 filename_no_path = ('_'.join(filepath.split("/")[-1].split("_")[0:3]).split('.rsk')[0]) -1302 if filename_no_path in rsk_filenames_no_path: -1303 continue -1304 rsk_filenames_no_path.append(filename_no_path) -1305 file_path = os.path.join(working_directory, filename) -1306 rsk_files_list.append(file_path) -1307 return rsk_files_list -1308 -1309 -1310def _get_cwd(): -1311 working_directory_path = None -1312 # determine if application is a script file or frozen exe -1313 if getattr(sys, 'frozen', False): -1314 working_directory_path = os.path.dirname(sys.executable) -1315 elif __file__: -1316 working_directory_path = os.getcwd() -1317 else: -1318 working_directory_path = os.getcwd() -1319 return working_directory_path -1320 -1321 -1322def _get_filename(filepath): -1323 return '_'.join(filepath.split("/")[-1].split("_")[0:3]).split('.rsk')[0] -1324 -1325def _reset_file_environment(): -1326 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") -1327 output_file_csv = "output.csv" -1328 output_file_csv_clean = "output_clean.csv" -1329 output_plots_dir = "plots" -1330 cwd = _get_cwd() -1331 CTD.master_sheet_path = os.path.join(cwd, CTD.master_sheet_path) -1332 output_file_csv = os.path.join(cwd, output_file_csv) -1333 output_file_csv_clean = os.path.join(cwd, output_file_csv_clean) -1334 if cwd is None: -1335 raise CTDError("", "Couldn't get working directory.") -1336 if os.path.isfile(output_file_csv): -1337 os.remove(output_file_csv) -1338 if os.path.isfile(output_file_csv_clean): -1339 os.remove(output_file_csv_clean) -1340 if os.path.isdir("./plots.gif"): -1341 os.remove("./plots/gif") -1342 if not os.path.isdir("./plots"): -1343 os.mkdir("./plots") -1344 if not os.path.isdir("./plots/gif"): -1345 os.mkdir("./plots/gif") -1346 -1347 -1348def main(): -1349 run_default(True) -1350 if len(sys.argv) < 2: -1351 print("Usage: ctdfjorder <command> [arguments]") -1352 print("Commands:") -1353 print(" process <file> Process a single RSK file") -1354 print(" merge Merge all RSK files in the current folder") -1355 print(" default Run the default processing pipeline") -1356 sys.exit(1) -1357 -1358 command = sys.argv[1] -1359 -1360 if command == "process": -1361 if len(sys.argv) < 3: -1362 print("Usage: ctdfjorder process <file>") -1363 sys.exit(1) +1199 return None, None # If no depth meets the criterion +1200 +1201 @staticmethod +1202 def calculate_mean_surface_density(df, range_): +1203 """ +1204 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1205 +1206 Parameters +1207 ---------- +1208 df : DataFrame +1209 DataFrame containing density data. +1210 range_ : tuple or int +1211 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, +1212 or an integer indicating the number of rows from the start. +1213 +1214 Returns +1215 ------- +1216 float, None +1217 Mean density value of the specified sample or None if unable to calculate. +1218 """ +1219 min_depth = df.index.min() +1220 max_depth = df.index.max() +1221 +1222 if isinstance(range_, tuple): +1223 start, end = range_ +1224 +1225 # Adjust 'start' to ensure it is within the valid range +1226 start = max(start, min_depth) +1227 +1228 # Adjust 'end' to ensure it does not exceed the maximum depth value +1229 end = min(end, max_depth) +1230 +1231 # Ensure start is less than end +1232 if start <= end: +1233 return df.loc[start:end, 'density'].mean() +1234 else: +1235 return None +1236 +1237 elif isinstance(range_, int): +1238 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length +1239 range_ = min(range_, len(df)) +1240 return df.iloc[:range_, df.columns.get_loc('density')].mean() +1241 +1242 else: +1243 raise ValueError("Invalid range type. Must be tuple or int.") +1244 +1245 +1246class CTDError(Exception): +1247 """ +1248 Exception raised for CTD related errors. +1249 +1250 Parameters +1251 ---------- +1252 filename: input dataset which caused the error +1253 message: message -- explanation of the error +1254 """ +1255 +1256 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): +1257 self.filename = filename +1258 self.message = message +1259 super().__init__(self.message) +1260 +1261 +1262def run_default(plot=False): +1263 _reset_file_environment() +1264 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") +1265 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) +1266 for file in rsk_files_list: +1267 try: +1268 my_data = CTD(file) +1269 my_data.add_filename_to_table() +1270 my_data.save_to_csv("output.csv") +1271 my_data.add_location_to_table() +1272 my_data.remove_non_positive_samples() +1273 my_data.clean("practicalsalinity", 'salinitydiff') +1274 my_data.add_absolute_salinity() +1275 my_data.add_density() +1276 my_data.add_overturns() +1277 my_data.add_mld(1) +1278 my_data.add_mld(5) +1279 my_data.save_to_csv("outputclean.csv") +1280 if plot: +1281 my_data.plot_depth_density_salinity_mld_scatter() +1282 my_data.plot_depth_temperature_scatter() +1283 my_data.plot_depth_salinity_density_mld_line() +1284 except Exception as e: +1285 print(f"Error processing file: '{file}' {e}") +1286 continue +1287 +1288 +1289def merge_all_in_folder(): +1290 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") +1291 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) +1292 for file in rsk_files_list: +1293 try: +1294 my_data = CTD(file) +1295 my_data.add_filename_to_table() +1296 my_data.add_location_to_table() +1297 my_data.save_to_csv("output.csv") +1298 except Exception as e: +1299 print(e) +1300 continue +1301 +1302def get_rsk_filenames_in_dir(working_directory): +1303 rsk_files_list = [] +1304 rsk_filenames_no_path = [] +1305 for filename in os.listdir(working_directory): +1306 if filename.endswith('.rsk'): +1307 for filepath in rsk_files_list: +1308 filename_no_path = ('_'.join(filepath.split("/")[-1].split("_")[0:3]).split('.rsk')[0]) +1309 if filename_no_path in rsk_filenames_no_path: +1310 continue +1311 rsk_filenames_no_path.append(filename_no_path) +1312 file_path = os.path.join(working_directory, filename) +1313 rsk_files_list.append(file_path) +1314 return rsk_files_list +1315 +1316 +1317def _get_cwd(): +1318 working_directory_path = None +1319 # determine if application is a script file or frozen exe +1320 if getattr(sys, 'frozen', False): +1321 working_directory_path = os.path.dirname(sys.executable) +1322 elif __file__: +1323 working_directory_path = os.getcwd() +1324 else: +1325 working_directory_path = os.getcwd() +1326 return working_directory_path +1327 +1328 +1329def _get_filename(filepath): +1330 return '_'.join(filepath.split("/")[-1].split("_")[0:3]).split('.rsk')[0] +1331 +1332def _reset_file_environment(): +1333 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") +1334 output_file_csv = "output.csv" +1335 output_file_csv_clean = "output_clean.csv" +1336 output_plots_dir = "plots" +1337 cwd = _get_cwd() +1338 CTD.master_sheet_path = os.path.join(cwd, CTD.master_sheet_path) +1339 output_file_csv = os.path.join(cwd, output_file_csv) +1340 output_file_csv_clean = os.path.join(cwd, output_file_csv_clean) +1341 if cwd is None: +1342 raise CTDError("", "Couldn't get working directory.") +1343 if os.path.isfile(output_file_csv): +1344 os.remove(output_file_csv) +1345 if os.path.isfile(output_file_csv_clean): +1346 os.remove(output_file_csv_clean) +1347 if os.path.isdir("./plots.gif"): +1348 os.remove("./plots/gif") +1349 if not os.path.isdir("./plots"): +1350 os.mkdir("./plots") +1351 if not os.path.isdir("./plots/gif"): +1352 os.mkdir("./plots/gif") +1353 +1354 +1355def main(): +1356 run_default(True) +1357 if len(sys.argv) < 2: +1358 print("Usage: ctdfjorder <command> [arguments]") +1359 print("Commands:") +1360 print(" process <file> Process a single RSK file") +1361 print(" merge Merge all RSK files in the current folder") +1362 print(" default Run the default processing pipeline") +1363 sys.exit(1) 1364 -1365 file_path = sys.argv[2] -1366 try: -1367 ctd = CTD(file_path) -1368 ctd.add_filename_to_table() -1369 ctd.save_to_csv("output.csv") -1370 ctd.add_location_to_table() -1371 ctd.remove_non_positive_samples() -1372 ctd.clean("practicalsalinity", 'salinitydiff') -1373 ctd.add_absolute_salinity() -1374 ctd.add_density() -1375 ctd.add_overturns() -1376 ctd.add_mld(0) -1377 ctd.add_mld(10) -1378 ctd.save_to_csv("outputclean.csv") -1379 print("Processing completed successfully.") -1380 except Exception as e: -1381 print(f"Error processing file: '{file_path}' {e}") -1382 -1383 elif command == "merge": -1384 merge_all_in_folder() -1385 print("Merging completed successfully.") -1386 -1387 elif command == "default": -1388 run_default() -1389 print("Default processing completed successfully.") -1390 -1391 elif command == "defaultplotall": -1392 run_default(True) -1393 print("Default processing completed successfully.") -1394 -1395 else: -1396 print(f"Unknown command: {command}") -1397 print("Usage: ctdfjorder <command> [arguments]") -1398 print("Commands:") -1399 print(" process <file> Process a single RSK file") -1400 print(" merge Merge all RSK files in the current folder") -1401 print(" default Run the default processing pipeline") -1402 print(" defaultplotall Run the default processing pipeline and create plots") -1403 print("CWD:") -1404 print(_get_cwd()) -1405 sys.exit(1) -1406 -1407 -1408if __name__ == "main": -1409 main() +1365 command = sys.argv[1] +1366 +1367 if command == "process": +1368 if len(sys.argv) < 3: +1369 print("Usage: ctdfjorder process <file>") +1370 sys.exit(1) +1371 +1372 file_path = sys.argv[2] +1373 try: +1374 ctd = CTD(file_path) +1375 ctd.add_filename_to_table() +1376 ctd.save_to_csv("output.csv") +1377 ctd.add_location_to_table() +1378 ctd.remove_non_positive_samples() +1379 ctd.clean("practicalsalinity", 'salinitydiff') +1380 ctd.add_absolute_salinity() +1381 ctd.add_density() +1382 ctd.add_overturns() +1383 ctd.add_mld(0) +1384 ctd.add_mld(10) +1385 ctd.save_to_csv("outputclean.csv") +1386 print("Processing completed successfully.") +1387 except Exception as e: +1388 print(f"Error processing file: '{file_path}' {e}") +1389 +1390 elif command == "merge": +1391 merge_all_in_folder() +1392 print("Merging completed successfully.") +1393 +1394 elif command == "default": +1395 run_default() +1396 print("Default processing completed successfully.") +1397 +1398 elif command == "defaultplotall": +1399 run_default(True) +1400 print("Default processing completed successfully.") +1401 +1402 else: +1403 print(f"Unknown command: {command}") +1404 print("Usage: ctdfjorder <command> [arguments]") +1405 print("Commands:") +1406 print(" process <file> Process a single RSK file") +1407 print(" merge Merge all RSK files in the current folder") +1408 print(" default Run the default processing pipeline") +1409 print(" defaultplotall Run the default processing pipeline and create plots") +1410 print("CWD:") +1411 print(_get_cwd()) +1412 sys.exit(1) +1413 +1414if __name__ == "__main__": +1415 main() @@ -1674,820 +1687,827 @@ 60 _filename = None 61 _calculator = None 62 _cwd = None - 63 master_sheet_path = "FjordPhyto MASTER SHEET.xlsx" - 64 _NO_SAMPLES_ERROR = "No samples in file." - 65 _NO_LOCATION_ERROR = "No location could be found." - 66 _DENSITY_CALCULATION_ERROR = "Could not calculate density on this dataset." - 67 _SALINITYABS_CALCULATION_ERROR = "Could not calculate density on this dataset." - 68 _DATA_CLEANING_ERROR = "No data remains after data cleaning, reverting to previous CTD" - 69 _REMOVE_NEGATIVES_ERROR = "No data remains after removing non-positive samples." - 70 _MLD_ERROR = "No data remains after calculating MLD." - 71 - 72 def __init__(self, rskfilepath): - 73 """ - 74 Initialize a new CTD object. - 75 - 76 Parameters - 77 ---------- - 78 rskfilepath : str - 79 The file path to the RSK file. - 80 """ - 81 self._rsk = RSK(rskfilepath) - 82 self._filename = ('_'.join(rskfilepath.split("/")[-1].split("_")[0:3]).split(".rsk")[0]) - 83 print("New CTDFjorder Object Created from : " + self._filename) - 84 self._ctd_array = np.array(self._rsk.npsamples()) - 85 self._ctd_array = pd.DataFrame(self._ctd_array) - 86 self.Utility = self.Utility(self._filename) - 87 self._cwd = _get_cwd() - 88 - 89 def view_table(self): - 90 """ - 91 Print the CTD data table. - 92 """ - 93 print(tabulate(self._ctd_array, headers='keys', tablefmt='psql')) - 94 - 95 def get_pandas_df(self , copy = True): - 96 """ - 97 Exposes the dataframe of the CTD object for custom processes. - 98 - 99 Parameters -100 ---------- -101 copy : bool, optional -102 If True returns a copy, if False returns the actual DataFrame internal to the CTD object. Defaults to True. -103 -104 Returns -105 ------- -106 DataFrame -107 The pandas df of the CTD object. -108 """ -109 return self._ctd_array.copy() if copy is True else self._ctd_array -110 -111 def add_filename_to_table(self): -112 """ -113 Add the filename to the CTD data table. -114 """ -115 self._ctd_array.assign(filename=self._filename) -116 -117 def remove_timezone_indicator(self): -118 """ -119 Removes the timezone indicator in the CTD data table 'timestamp' column. -120 """ -121 self._ctd_array = self.Utility.remove_sample_timezone_indicator(self._ctd_array) -122 -123 def add_location_to_table(self): -124 """ -125 Retrieves the sample location data from the RSK file and adds it to the CTD data table. -126 If no location data is found, it attempts to estimate the location using the master sheet. -127 """ -128 location_data = self.Utility.get_sample_location(self._rsk, self._filename) -129 if self.Utility.no_values_in_object(self._ctd_array): -130 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -131 try: -132 self._ctd_array = self._ctd_array.assign(latitude=location_data[0], -133 longitude=location_data[1]) -134 except Exception: -135 self._ctd_array.loc['latitude'] = None -136 self._ctd_array.loc['longitude'] = None -137 self._ctd_array.loc['filename'] = None -138 raise CTDError(self._filename, self._NO_LOCATION_ERROR) -139 def remove_upcasts(self): -140 """ -141 Finds the global maximum depth of the sample, and filters out timestamps that occur before it. -142 """ -143 max_depth_index = self._ctd_array['depth_00'].idxmax() -144 max_depth_timestamp = self._ctd_array.loc[max_depth_index, 'timestamp'] -145 self._ctd_array = self._ctd_array[self._ctd_array['timestamp'] >=max_depth_timestamp] -146 -147 def remove_non_positive_samples(self): -148 """ -149 Iterates through the columns of the CTD data table and removes rows with non-positive values -150 for depth, pressure, salinity, absolute salinity, or density. -151 """ -152 if self.Utility.no_values_in_object(self._ctd_array): -153 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -154 for column in self._ctd_array.columns: -155 match column: -156 case 'depth_00': -157 self._ctd_array = self.Utility.remove_rows_with_negative_depth(self._ctd_array) -158 case 'pressure_00': -159 self._ctd_array = self.Utility.remove_rows_with_negative_pressure(self._ctd_array) -160 case 'salinity_00': -161 self._ctd_array = self.Utility.remove_rows_with_negative_salinity(self._ctd_array) -162 case 'salinityabs': -163 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) -164 case 'density': -165 self._ctd_array = self.Utility.remove_rows_with_negative_density(self._ctd_array) -166 if self.Utility.no_values_in_object(self._ctd_array): -167 raise CTDError(self._filename, self._REMOVE_NEGATIVES_ERROR) -168 -169 def clean(self, feature, method='salinitydiff'): -170 """ -171 Applies complex data cleaning methods to the specified feature based on the selected method. -172 Currently supports cleaning practical salinity using the 'salinitydiff' method. -173 -174 Parameters -175 ---------- -176 feature : str -177 The feature to clean (e.g., 'practicalsalinity'). -178 method : str, optional -179 The cleaning method to apply, defaults to 'salinitydiff'. -180 Options are 'salinitydiff'. -181 """ -182 if self.Utility.no_values_in_object(self._ctd_array): -183 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -184 supported_features = { -185 "practicalsalinity": "salinity_00" -186 } -187 supported_methods = { -188 "salinitydiff": Calculate.calculate_and_drop_salinity_spikes(self._ctd_array), -189 } -190 if feature in supported_features.keys(): -191 if method in supported_methods.keys(): -192 self._ctd_array.loc[self._ctd_array.index, 'salinity_00'] = supported_methods[method] -193 else: -194 print(f"clean: Invalid method \"{method}\" not in {supported_methods.keys()}") -195 else: -196 print(f"clean: Invalid feature \"{feature}\" not in {supported_features.keys()}.") -197 if self.Utility.no_values_in_object(self._ctd_array): -198 raise CTDError(self._filename, self._DATA_CLEANING_ERROR) -199 -200 def add_absolute_salinity(self): -201 """ -202 Calculates the absolute salinity using the TEOS-10 equations and adds it as a new column -203 to the CTD data table. Removes rows with negative absolute salinity values. -204 """ -205 if self.Utility.no_values_in_object(self._ctd_array): -206 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -207 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = Calculate.calculate_absolute_salinity( -208 self._ctd_array) -209 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) -210 if self.Utility.no_values_in_object(self._ctd_array): -211 raise CTDError(self._filename, self._SALINITYABS_CALCULATION_ERROR) -212 -213 def add_density(self): -214 """ -215 Calculates the density using the TEOS-10 equations and adds it as a new column to the CTD -216 data table. If absolute salinity is not present, it is calculated first. -217 """ -218 if self.Utility.no_values_in_object(self._ctd_array): -219 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -220 if 'salinityabs' in self._ctd_array.columns: -221 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( -222 self._ctd_array) -223 else: -224 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = self.add_absolute_salinity() -225 self._ctd_array = Calculate.calculate_absolute_density(self._ctd_array) -226 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( -227 self._ctd_array) -228 self._ctd_array.drop('salinityabs') -229 if self.Utility.no_values_in_object(self._ctd_array): -230 raise CTDError(self._filename, self._DENSITY_CALCULATION_ERROR) -231 -232 def add_overturns(self): -233 """ -234 Calculates density changes between consecutive measurements and identifies overturns where -235 denser water lies above less dense water. Adds an 'overturn' column to the CTD data table. -236 """ -237 if self.Utility.no_values_in_object(self._ctd_array): -238 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -239 self._ctd_array = Calculate.calculate_overturns(self._ctd_array.copy()) -240 -241 def add_mean_surface_density(self, start = 0.0, end = 100.0): -242 """ -243 Calculates the mean surface density from the density values and adds it as a new column -244 to the CTD data table. -245 -246 Parameters -247 ---------- -248 start : float, optional -249 Depth bound, defaults to 0. -250 end : float, optional -251 Depth bound, default to 1. -252 """ -253 if self.Utility.no_values_in_object(self._ctd_array): -254 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -255 mean_surface_density = Calculate.calculate_mean_surface_density(self._ctd_array.copy(), (start, end)) -256 self._ctd_array = self._ctd_array.assign(mean_surface_density=mean_surface_density) -257 -258 def add_mld(self, reference, method="default"): -259 """ -260 Calculates the mixed layer depth using the specified method and reference depth. -261 Adds the MLD and the actual reference depth used as new columns to the CTD data table. -262 -263 Parameters -264 ---------- -265 reference : int -266 The reference depth for MLD calculation. -267 method : int -268 The MLD calculation method (default: "default"). -269 """ -270 if self.Utility.no_values_in_object(self._ctd_array): -271 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -272 copy_ctd_array = self._ctd_array.copy() -273 supported_methods = [ -274 "default" -275 ] -276 unpack = None -277 -278 if method == "default": -279 unpack = Calculate.calculate_mld(copy_ctd_array['density'], copy_ctd_array['depth_00'], -280 reference) -281 else: -282 print(f"add_mld: Invalid method \"{method}\" not in {supported_methods}") -283 unpack = [None, None] -284 if unpack is None: + 63 _cached_master_sheet = None + 64 master_sheet_path = "FjordPhyto MASTER SHEET.xlsx" + 65 _NO_SAMPLES_ERROR = "No samples in file." + 66 _NO_LOCATION_ERROR = "No location could be found." + 67 _DENSITY_CALCULATION_ERROR = "Could not calculate density on this dataset." + 68 _SALINITYABS_CALCULATION_ERROR = "Could not calculate density on this dataset." + 69 _DATA_CLEANING_ERROR = "No data remains after data cleaning, reverting to previous CTD" + 70 _REMOVE_NEGATIVES_ERROR = "No data remains after removing non-positive samples." + 71 _MLD_ERROR = "No data remains after calculating MLD." + 72 + 73 def __init__(self, rskfilepath): + 74 """ + 75 Initialize a new CTD object. + 76 + 77 Parameters + 78 ---------- + 79 rskfilepath : str + 80 The file path to the RSK file. + 81 """ + 82 self._rsk = RSK(rskfilepath) + 83 self._filename = ('_'.join(rskfilepath.split("/")[-1].split("_")[0:3]).split(".rsk")[0]) + 84 print("New CTDFjorder Object Created from : " + self._filename) + 85 self._ctd_array = np.array(self._rsk.npsamples()) + 86 self._ctd_array = pd.DataFrame(self._ctd_array) + 87 self.Utility = self.Utility(self._filename) + 88 self._cwd = _get_cwd() + 89 + 90 def view_table(self): + 91 """ + 92 Print the CTD data table. + 93 """ + 94 print(tabulate(self._ctd_array, headers='keys', tablefmt='psql')) + 95 + 96 def get_pandas_df(self , copy = True): + 97 """ + 98 Exposes the dataframe of the CTD object for custom processes. + 99 +100 Parameters +101 ---------- +102 copy : bool, optional +103 If True returns a copy, if False returns the actual DataFrame internal to the CTD object. Defaults to True. +104 +105 Returns +106 ------- +107 DataFrame +108 The pandas df of the CTD object. +109 """ +110 return self._ctd_array.copy() if copy is True else self._ctd_array +111 +112 def add_filename_to_table(self): +113 """ +114 Add the filename to the CTD data table. +115 """ +116 self._ctd_array.assign(filename=self._filename) +117 +118 def remove_timezone_indicator(self): +119 """ +120 Removes the timezone indicator in the CTD data table 'timestamp' column. +121 """ +122 self._ctd_array = self.Utility.remove_sample_timezone_indicator(self._ctd_array) +123 +124 def add_location_to_table(self): +125 """ +126 Retrieves the sample location data from the RSK file and adds it to the CTD data table. +127 If no location data is found, it attempts to estimate the location using the master sheet. +128 """ +129 location_data = self.Utility.get_sample_location(self._rsk, self._filename) +130 if self.Utility.no_values_in_object(self._ctd_array): +131 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +132 try: +133 self._ctd_array = self._ctd_array.assign(latitude=location_data[0], +134 longitude=location_data[1], +135 filename=location_data[2]) +136 except Exception: +137 self._ctd_array.loc['latitude'] = None +138 self._ctd_array.loc['longitude'] = None +139 self._ctd_array.loc['filename'] = None +140 raise CTDError(self._filename, self._NO_LOCATION_ERROR) +141 def remove_upcasts(self): +142 """ +143 Finds the global maximum depth of the sample, and filters out timestamps that occur before it. +144 """ +145 max_depth_index = self._ctd_array['depth_00'].idxmax() +146 max_depth_timestamp = self._ctd_array.loc[max_depth_index, 'timestamp'] +147 self._ctd_array = self._ctd_array[self._ctd_array['timestamp'] >=max_depth_timestamp] +148 +149 def remove_non_positive_samples(self): +150 """ +151 Iterates through the columns of the CTD data table and removes rows with non-positive values +152 for depth, pressure, salinity, absolute salinity, or density. +153 """ +154 if self.Utility.no_values_in_object(self._ctd_array): +155 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +156 for column in self._ctd_array.columns: +157 match column: +158 case 'depth_00': +159 self._ctd_array = self.Utility.remove_rows_with_negative_depth(self._ctd_array) +160 case 'pressure_00': +161 self._ctd_array = self.Utility.remove_rows_with_negative_pressure(self._ctd_array) +162 case 'salinity_00': +163 self._ctd_array = self.Utility.remove_rows_with_negative_salinity(self._ctd_array) +164 case 'salinityabs': +165 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) +166 case 'density': +167 self._ctd_array = self.Utility.remove_rows_with_negative_density(self._ctd_array) +168 if self.Utility.no_values_in_object(self._ctd_array): +169 raise CTDError(self._filename, self._REMOVE_NEGATIVES_ERROR) +170 +171 def clean(self, feature, method='salinitydiff'): +172 """ +173 Applies complex data cleaning methods to the specified feature based on the selected method. +174 Currently supports cleaning practical salinity using the 'salinitydiff' method. +175 +176 Parameters +177 ---------- +178 feature : str +179 The feature to clean (e.g., 'practicalsalinity'). +180 method : str, optional +181 The cleaning method to apply, defaults to 'salinitydiff'. +182 Options are 'salinitydiff'. +183 """ +184 if self.Utility.no_values_in_object(self._ctd_array): +185 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +186 supported_features = { +187 "practicalsalinity": "salinity_00" +188 } +189 supported_methods = { +190 "salinitydiff": Calculate.calculate_and_drop_salinity_spikes(self._ctd_array), +191 } +192 if feature in supported_features.keys(): +193 if method in supported_methods.keys(): +194 self._ctd_array.loc[self._ctd_array.index, 'salinity_00'] = supported_methods[method] +195 else: +196 print(f"clean: Invalid method \"{method}\" not in {supported_methods.keys()}") +197 else: +198 print(f"clean: Invalid feature \"{feature}\" not in {supported_features.keys()}.") +199 if self.Utility.no_values_in_object(self._ctd_array): +200 raise CTDError(self._filename, self._DATA_CLEANING_ERROR) +201 +202 def add_absolute_salinity(self): +203 """ +204 Calculates the absolute salinity using the TEOS-10 equations and adds it as a new column +205 to the CTD data table. Removes rows with negative absolute salinity values. +206 """ +207 if self.Utility.no_values_in_object(self._ctd_array): +208 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +209 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = Calculate.calculate_absolute_salinity( +210 self._ctd_array) +211 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) +212 if self.Utility.no_values_in_object(self._ctd_array): +213 raise CTDError(self._filename, self._SALINITYABS_CALCULATION_ERROR) +214 +215 def add_density(self): +216 """ +217 Calculates the density using the TEOS-10 equations and adds it as a new column to the CTD +218 data table. If absolute salinity is not present, it is calculated first. +219 """ +220 if self.Utility.no_values_in_object(self._ctd_array): +221 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +222 if 'salinityabs' in self._ctd_array.columns: +223 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( +224 self._ctd_array) +225 else: +226 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = self.add_absolute_salinity() +227 self._ctd_array = Calculate.calculate_absolute_density(self._ctd_array) +228 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( +229 self._ctd_array) +230 self._ctd_array.drop('salinityabs') +231 if self.Utility.no_values_in_object(self._ctd_array): +232 raise CTDError(self._filename, self._DENSITY_CALCULATION_ERROR) +233 +234 def add_overturns(self): +235 """ +236 Calculates density changes between consecutive measurements and identifies overturns where +237 denser water lies above less dense water. Adds an 'overturn' column to the CTD data table. +238 """ +239 if self.Utility.no_values_in_object(self._ctd_array): +240 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +241 self._ctd_array = Calculate.calculate_overturns(self._ctd_array.copy()) +242 +243 def add_mean_surface_density(self, start = 0.0, end = 100.0): +244 """ +245 Calculates the mean surface density from the density values and adds it as a new column +246 to the CTD data table. +247 +248 Parameters +249 ---------- +250 start : float, optional +251 Depth bound, defaults to 0. +252 end : float, optional +253 Depth bound, default to 1. +254 """ +255 if self.Utility.no_values_in_object(self._ctd_array): +256 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +257 mean_surface_density = Calculate.calculate_mean_surface_density(self._ctd_array.copy(), (start, end)) +258 self._ctd_array = self._ctd_array.assign(mean_surface_density=mean_surface_density) +259 +260 def add_mld(self, reference, method="default"): +261 """ +262 Calculates the mixed layer depth using the specified method and reference depth. +263 Adds the MLD and the actual reference depth used as new columns to the CTD data table. +264 +265 Parameters +266 ---------- +267 reference : int +268 The reference depth for MLD calculation. +269 method : int +270 The MLD calculation method (default: "default"). +271 """ +272 if self.Utility.no_values_in_object(self._ctd_array): +273 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +274 copy_ctd_array = self._ctd_array.copy() +275 supported_methods = [ +276 "default" +277 ] +278 unpack = None +279 +280 if method == "default": +281 unpack = Calculate.calculate_mld(copy_ctd_array['density'], copy_ctd_array['depth_00'], +282 reference) +283 else: +284 print(f"add_mld: Invalid method \"{method}\" not in {supported_methods}") 285 unpack = [None, None] -286 raise CTDError("MLD could not be calculated.") -287 MLD = unpack[0] -288 depth_used_as_reference = unpack[1] -289 self._ctd_array.loc[self._ctd_array.index, f'MLD {reference}'] = MLD -290 self._ctd_array.loc[ -291 self._ctd_array.index, f'MLD {reference} Actual Reference Depth'] = depth_used_as_reference -292 self._ctd_array = copy_ctd_array.merge(self._ctd_array) -293 if self.Utility.no_values_in_object(self._ctd_array): -294 raise CTDError(self._filename, self._MLD_ERROR) -295 -296 def save_to_csv(self, output_file): -297 """ -298 Renames the columns of the CTD data table based on a predefined mapping and saves the -299 data to the specified CSV file. If the file already exists, the data is appended to it. -300 -301 Parameters -302 ---------- -303 output_file : str -304 The output CSV file path. -305 """ -306 rsk_labels = { -307 "temperature_00": "Temperature (°C)", -308 "pressure_00": "Pressure (dbar)", -309 "chlorophyll_00": "Chlorophyll a (µg/l)", -310 "seapressure_00": "Sea Pressure (dbar)", -311 "depth_00": "Depth (m)", -312 "salinity_00": "Salinity (PSU)", -313 "speedofsound_00": "Speed of Sound (m/s)", -314 "specificconductivity_00": "Specific Conductivity (µS/cm)", -315 "conductivity_00": "Conductivity (mS/cm)", -316 "density": "Density (kg/m^3) Derived", -317 "salinityabs": "Absolute Salinity (g/kg) Derived", -318 "MLD_Zero": "MLD Zero (m) Derived", -319 "MLD_Ten": "MLD Ten (m) Derived", -320 "stratification": "Stratification (J/m^2) Derived", -321 "mean_surface_density": "Mean Surface Density (kg/m^3) Derived", -322 "overturn": "Overturn (Δρ < -0.05)" -323 } -324 # Renaming columns -325 data = self._ctd_array.copy() -326 if 'filename' in data.columns: -327 data = data[[col for col in data.columns if col != 'filename'] + ['filename']] -328 for key, new_column_name in rsk_labels.items(): -329 if key in data.columns: -330 data = data.rename(columns={key: new_column_name}) -331 data.reset_index(inplace=True, drop=True) -332 try: -333 csv_df = pd.read_csv(str(output_file)) -334 except FileNotFoundError: -335 print(f"Error: The file {output_file} does not exist. A new file will be created.") -336 csv_df = pd.DataFrame() # If file does not exist, create an empty DataFrame -337 -338 # Merge the existing DataFrame with the new DataFrame -339 merged_df = pd.concat([csv_df, data], ignore_index=True) -340 -341 # Overwrite the original CSV file with the merged DataFrame -342 merged_df.to_csv(output_file, index=False) -343 -344 return merged_df +286 if unpack is None: +287 unpack = [None, None] +288 raise CTDError("MLD could not be calculated.") +289 MLD = unpack[0] +290 depth_used_as_reference = unpack[1] +291 self._ctd_array.loc[self._ctd_array.index, f'MLD {reference}'] = MLD +292 self._ctd_array.loc[ +293 self._ctd_array.index, f'MLD {reference} Actual Reference Depth'] = depth_used_as_reference +294 self._ctd_array = copy_ctd_array.merge(self._ctd_array) +295 if self.Utility.no_values_in_object(self._ctd_array): +296 raise CTDError(self._filename, self._MLD_ERROR) +297 +298 def save_to_csv(self, output_file): +299 """ +300 Renames the columns of the CTD data table based on a predefined mapping and saves the +301 data to the specified CSV file. If the file already exists, the data is appended to it. +302 +303 Parameters +304 ---------- +305 output_file : str +306 The output CSV file path. +307 """ +308 rsk_labels = { +309 "temperature_00": "Temperature (°C)", +310 "pressure_00": "Pressure (dbar)", +311 "chlorophyll_00": "Chlorophyll a (µg/l)", +312 "seapressure_00": "Sea Pressure (dbar)", +313 "depth_00": "Depth (m)", +314 "salinity_00": "Salinity (PSU)", +315 "speedofsound_00": "Speed of Sound (m/s)", +316 "specificconductivity_00": "Specific Conductivity (µS/cm)", +317 "conductivity_00": "Conductivity (mS/cm)", +318 "density": "Density (kg/m^3) Derived", +319 "salinityabs": "Absolute Salinity (g/kg) Derived", +320 "MLD_Zero": "MLD Zero (m) Derived", +321 "MLD_Ten": "MLD Ten (m) Derived", +322 "stratification": "Stratification (J/m^2) Derived", +323 "mean_surface_density": "Mean Surface Density (kg/m^3) Derived", +324 "overturn": "Overturn (Δρ < -0.05)" +325 } +326 # Renaming columns +327 data = self._ctd_array.copy() +328 if 'filename' in data.columns: +329 data = data[[col for col in data.columns if col != 'filename'] + ['filename']] +330 for key, new_column_name in rsk_labels.items(): +331 if key in data.columns: +332 data = data.rename(columns={key: new_column_name}) +333 data.reset_index(inplace=True, drop=True) +334 try: +335 csv_df = pd.read_csv(str(output_file)) +336 except FileNotFoundError: +337 print(f"Error: The file {output_file} does not exist. A new file will be created.") +338 csv_df = pd.DataFrame() # If file does not exist, create an empty DataFrame +339 +340 # Merge the existing DataFrame with the new DataFrame +341 merged_df = pd.concat([csv_df, data], ignore_index=True) +342 +343 # Overwrite the original CSV file with the merged DataFrame +344 merged_df.to_csv(output_file, index=False) 345 -346 def plot_depth_salinity_density_mld_line(self): -347 """ -348 Generates a plot of depth vs. salinity and density, applying LOESS smoothing to the data. -349 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. -350 Saves the plot as an image file. -351 """ -352 df = self._ctd_array.copy() -353 filename = self._filename -354 plt.rcParams.update({'font.size': 16}) -355 df_filtered = df -356 if df_filtered.isnull().values.any(): -357 df_filtered.dropna(inplace=True) # Drop rows with NaNs -358 df_filtered = df_filtered.reset_index(drop=True) -359 if len(df_filtered) < 1: -360 return -361 fig, ax1 = plt.subplots(figsize=(18, 18)) -362 ax1.invert_yaxis() -363 # Dynamically set y-axis limits based on depth data -364 max_depth = df_filtered['depth_00'].max() -365 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards -366 lowess = statsmodels.api.nonparametric.lowess -367 salinity_lowess = lowess(df_filtered['salinity_00'], df_filtered['depth_00'], frac=0.1) -368 salinity_depths, salinity_smooth = zip(*salinity_lowess) -369 color_salinity = 'tab:blue' -370 ax1.plot(salinity_smooth, salinity_depths, color=color_salinity, label='Practical Salinity') -371 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) -372 ax1.set_ylabel('Depth (m)') -373 ax1.tick_params(axis='x', labelcolor=color_salinity) -374 density_lowess = lowess(df_filtered['density'], df_filtered['depth_00'], frac=0.1) -375 density_depths, density_smooth = zip(*density_lowess) -376 ax2 = ax1.twiny() -377 color_density = 'tab:red' -378 ax2.plot(density_smooth, density_depths, color=color_density, label='Density (kg/m^3)') -379 ax2.set_xlabel('Density (kg/m^3)', color=color_density) -380 ax2.tick_params(axis='x', labelcolor=color_density) -381 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) -382 mld_cols = [] -383 for col in df.columns: -384 if 'MLD' in col and 'Actual' not in col: -385 mld_cols.append(df[col]) -386 refdepth_cols = [] -387 for col in df.columns: -388 if 'Actual' in col: -389 refdepth_cols.append(df[col]) -390 for idx, mld_col in enumerate(mld_cols): -391 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', -392 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') -393 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', -394 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) -395 if df_filtered['overturn'].any(): -396 plt.title( -397 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " -398 f"\n THIS IS AN UNSTABLE WATER COLUMN " -399 f"\n(Higher density fluid lies above lower density fluid)") -400 else: -401 plt.title( -402 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " -403 f"\n THIS IS AN UNSTABLE WATER COLUMN " -404 f"\n(Higher density fluid lies above lower density fluid)") -405 ax1.grid(True) -406 lines, labels = ax1.get_legend_handles_labels() -407 ax2_legend = ax2.get_legend_handles_labels() -408 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='lower center', bbox_to_anchor=(0.5, -0.15), -409 ncol=3) -410 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes_line.png") -411 plot_folder = os.path.join(self._cwd, "plots") -412 if not (os.path.isdir(plot_folder)): -413 os.mkdir(plot_folder) -414 plt.savefig(plot_path) -415 plt.close(fig) -416 -417 def plot_depth_density_salinity_mld_scatter(self): -418 """ -419 Generates a scatter plot of depth vs. salinity and density. -420 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. -421 Saves the plot as an image file. -422 """ -423 df = self._ctd_array.copy() -424 filename = self._filename -425 plt.rcParams.update({'font.size': 16}) -426 df_filtered = df -427 if df_filtered.empty: -428 plt.close() -429 return -430 df_filtered = df_filtered.reset_index(drop=True) -431 fig, ax1 = plt.subplots(figsize=(18, 18)) -432 ax1.invert_yaxis() -433 # Dynamically set y-axis limits based on depth data -434 max_depth = df_filtered['depth_00'].max() -435 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards -436 color_salinity = 'tab:blue' -437 ax1.scatter(df_filtered['salinity_00'], df_filtered['depth_00'], color=color_salinity, -438 label='Practical Salinity') -439 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) -440 ax1.set_ylabel('Depth (m)') -441 ax1.tick_params(axis='x', labelcolor=color_salinity) -442 ax2 = ax1.twiny() -443 color_density = 'tab:red' -444 ax2.scatter(df_filtered['density'], df_filtered['depth_00'], color=color_density, label='Density (kg/m^3)') -445 ax2.set_xlabel('Density (kg/m^3)', color=color_density) -446 ax2.tick_params(axis='x', labelcolor=color_density) -447 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) -448 mld_cols = [] -449 for col in df.columns: -450 if 'MLD' in col and 'Actual' not in col: -451 mld_cols.append(df[col]) -452 refdepth_cols = [] -453 for col in df.columns: -454 if 'Actual' in col: -455 refdepth_cols.append(df[col]) -456 for idx, mld_col in enumerate(mld_cols): -457 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', -458 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') -459 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', -460 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) -461 if df_filtered['overturn'].any(): -462 plt.title( -463 f"{filename}\n Depth vs. Salinity and Density " -464 f"\n THIS IS AN UNSTABLE WATER COLUMN " -465 f"\n(Higher density fluid lies above lower density fluid)") -466 else: -467 plt.title( -468 f"{filename}\n Depth vs. Salinity and Density " -469 f"\n THIS IS AN UNSTABLE WATER COLUMN " -470 f"\n(Higher density fluid lies above lower density fluid)") -471 ax1.grid(True) -472 lines, labels = ax1.get_legend_handles_labels() -473 ax2_legend = ax2.get_legend_handles_labels() -474 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='upper center', bbox_to_anchor=(0.5, -0.15), -475 ncol=3) -476 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes.png") -477 plot_folder = os.path.join(self._cwd, "plots") -478 if not (os.path.isdir(plot_folder)): -479 os.mkdir(plot_folder) -480 plt.savefig(plot_path) -481 plt.close(fig) -482 -483 def plot_depth_temperature_scatter(self): -484 """ -485 Generates a scatter plot of depth vs. temperature. -486 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. -487 Saves the plot as an image file. -488 """ -489 df = self._ctd_array.copy() -490 filename = self._filename -491 plt.rcParams.update({'font.size': 16}) -492 df_filtered = df -493 if df_filtered.empty: -494 plt.close() -495 return -496 df_filtered = df_filtered.reset_index(drop=True) -497 fig, ax1 = plt.subplots(figsize=(18, 18)) -498 ax1.invert_yaxis() -499 # Dynamically set y-axis limits based on depth data -500 max_depth = df_filtered['depth_00'].max() -501 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards -502 -503 color_temp = 'tab:blue' -504 ax1.scatter(df_filtered['temperature_00'], df_filtered['depth_00'], color=color_temp, -505 label="Temperature (°C)") -506 ax1.set_xlabel("Temperature (°C)", color=color_temp) -507 ax1.set_ylabel('Depth (m)') -508 ax1.tick_params(axis='x', labelcolor=color_temp) -509 mld_cols = [] -510 for col in df.columns: -511 if "MLD" in col and "Actual" not in col: -512 mld_cols.append(df[col]) -513 refdepth_cols = [] -514 for col in df.columns: -515 if "Reference Depth" in col: -516 refdepth_cols.append(df[col]) -517 for idx, mld_col in enumerate(mld_cols): -518 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', -519 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') -520 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', -521 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) -522 if df_filtered['overturn'].any(): -523 plt.title( -524 f"{filename}\n Depth vs. Temperature \n " -525 f"THIS IS AN UNSTABLE WATER COLUMN \n" -526 f"(Higher density fluid lies above lower density fluid)") -527 else: -528 plt.title( -529 f"{filename}\n Depth vs. Temperature \n " -530 f"THIS IS AN UNSTABLE WATER COLUMN \n" -531 f"(Higher density fluid lies above lower density fluid)") -532 ax1.grid(True) -533 lines, labels = ax1.get_legend_handles_labels() -534 ax1.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3) -535 plot_path = os.path.join(self._cwd, f"plots/{filename}_temperature_depth_plot.png") -536 plot_folder = os.path.join(self._cwd, "plots") -537 if not (os.path.isdir(plot_folder)): -538 os.mkdir(plot_folder) -539 plt.savefig(plot_path) -540 plt.close(fig) -541 -542 class Utility: -543 """ -544 Utility -545 -------- -546 Utility class for CTD data processing. -547 -548 Attributes -549 ---------- -550 filename : str -551 Filename of the RSK file. -552 mastersheet : str -553 Path to the master sheet Excel file. -554 """ -555 -556 def __init__(self, filename): -557 """ -558 Initialize a new Utility object. -559 Parameters -560 ---------- -561 filename : str -562 The filename of the RSK file. -563 """ -564 self.filename = filename -565 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) -566 -567 def no_values_in_object(self, object_to_check): -568 """ -569 Checks if the given object is None, empty, or has a length greater than 0. -570 Returns True if the object has no values, False otherwise. -571 -572 Parameters -573 ---------- -574 object_to_check : object -575 The object to check for values. -576 Returns -577 -------- -578 bool -579 True if the object has no values, False otherwise. -580 """ -581 if isinstance(object_to_check, type(None)): -582 return True -583 if object_to_check.empty: +346 return merged_df +347 +348 def plot_depth_salinity_density_mld_line(self): +349 """ +350 Generates a plot of depth vs. salinity and density, applying LOESS smoothing to the data. +351 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +352 Saves the plot as an image file. +353 """ +354 df = self._ctd_array.copy() +355 filename = self._filename +356 plt.rcParams.update({'font.size': 16}) +357 df_filtered = df +358 if df_filtered.isnull().values.any(): +359 df_filtered.dropna(inplace=True) # Drop rows with NaNs +360 df_filtered = df_filtered.reset_index(drop=True) +361 if len(df_filtered) < 1: +362 return +363 fig, ax1 = plt.subplots(figsize=(18, 18)) +364 ax1.invert_yaxis() +365 # Dynamically set y-axis limits based on depth data +366 max_depth = df_filtered['depth_00'].max() +367 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +368 lowess = statsmodels.api.nonparametric.lowess +369 salinity_lowess = lowess(df_filtered['salinity_00'], df_filtered['depth_00'], frac=0.1) +370 salinity_depths, salinity_smooth = zip(*salinity_lowess) +371 color_salinity = 'tab:blue' +372 ax1.plot(salinity_smooth, salinity_depths, color=color_salinity, label='Practical Salinity') +373 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) +374 ax1.set_ylabel('Depth (m)') +375 ax1.tick_params(axis='x', labelcolor=color_salinity) +376 density_lowess = lowess(df_filtered['density'], df_filtered['depth_00'], frac=0.1) +377 density_depths, density_smooth = zip(*density_lowess) +378 ax2 = ax1.twiny() +379 color_density = 'tab:red' +380 ax2.plot(density_smooth, density_depths, color=color_density, label='Density (kg/m^3)') +381 ax2.set_xlabel('Density (kg/m^3)', color=color_density) +382 ax2.tick_params(axis='x', labelcolor=color_density) +383 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) +384 mld_cols = [] +385 for col in df.columns: +386 if 'MLD' in col and 'Actual' not in col: +387 mld_cols.append(df[col]) +388 refdepth_cols = [] +389 for col in df.columns: +390 if 'Actual' in col: +391 refdepth_cols.append(df[col]) +392 for idx, mld_col in enumerate(mld_cols): +393 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +394 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +395 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +396 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +397 if df_filtered['overturn'].any(): +398 plt.title( +399 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " +400 f"\n THIS IS AN UNSTABLE WATER COLUMN " +401 f"\n(Higher density fluid lies above lower density fluid)") +402 else: +403 plt.title( +404 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " +405 f"\n THIS IS AN UNSTABLE WATER COLUMN " +406 f"\n(Higher density fluid lies above lower density fluid)") +407 ax1.grid(True) +408 lines, labels = ax1.get_legend_handles_labels() +409 ax2_legend = ax2.get_legend_handles_labels() +410 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='lower center', bbox_to_anchor=(0.5, -0.15), +411 ncol=3) +412 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes_line.png") +413 plot_folder = os.path.join(self._cwd, "plots") +414 if not (os.path.isdir(plot_folder)): +415 os.mkdir(plot_folder) +416 plt.savefig(plot_path) +417 plt.close(fig) +418 +419 def plot_depth_density_salinity_mld_scatter(self): +420 """ +421 Generates a scatter plot of depth vs. salinity and density. +422 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +423 Saves the plot as an image file. +424 """ +425 df = self._ctd_array.copy() +426 filename = self._filename +427 plt.rcParams.update({'font.size': 16}) +428 df_filtered = df +429 if df_filtered.empty: +430 plt.close() +431 return +432 df_filtered = df_filtered.reset_index(drop=True) +433 fig, ax1 = plt.subplots(figsize=(18, 18)) +434 ax1.invert_yaxis() +435 # Dynamically set y-axis limits based on depth data +436 max_depth = df_filtered['depth_00'].max() +437 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +438 color_salinity = 'tab:blue' +439 ax1.scatter(df_filtered['salinity_00'], df_filtered['depth_00'], color=color_salinity, +440 label='Practical Salinity') +441 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) +442 ax1.set_ylabel('Depth (m)') +443 ax1.tick_params(axis='x', labelcolor=color_salinity) +444 ax2 = ax1.twiny() +445 color_density = 'tab:red' +446 ax2.scatter(df_filtered['density'], df_filtered['depth_00'], color=color_density, label='Density (kg/m^3)') +447 ax2.set_xlabel('Density (kg/m^3)', color=color_density) +448 ax2.tick_params(axis='x', labelcolor=color_density) +449 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) +450 mld_cols = [] +451 for col in df.columns: +452 if 'MLD' in col and 'Actual' not in col: +453 mld_cols.append(df[col]) +454 refdepth_cols = [] +455 for col in df.columns: +456 if 'Actual' in col: +457 refdepth_cols.append(df[col]) +458 for idx, mld_col in enumerate(mld_cols): +459 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +460 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +461 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +462 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +463 if df_filtered['overturn'].any(): +464 plt.title( +465 f"{filename}\n Depth vs. Salinity and Density " +466 f"\n THIS IS AN UNSTABLE WATER COLUMN " +467 f"\n(Higher density fluid lies above lower density fluid)") +468 else: +469 plt.title( +470 f"{filename}\n Depth vs. Salinity and Density " +471 f"\n THIS IS AN UNSTABLE WATER COLUMN " +472 f"\n(Higher density fluid lies above lower density fluid)") +473 ax1.grid(True) +474 lines, labels = ax1.get_legend_handles_labels() +475 ax2_legend = ax2.get_legend_handles_labels() +476 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='upper center', bbox_to_anchor=(0.5, -0.15), +477 ncol=3) +478 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes.png") +479 plot_folder = os.path.join(self._cwd, "plots") +480 if not (os.path.isdir(plot_folder)): +481 os.mkdir(plot_folder) +482 plt.savefig(plot_path) +483 plt.close(fig) +484 +485 def plot_depth_temperature_scatter(self): +486 """ +487 Generates a scatter plot of depth vs. temperature. +488 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +489 Saves the plot as an image file. +490 """ +491 df = self._ctd_array.copy() +492 filename = self._filename +493 plt.rcParams.update({'font.size': 16}) +494 df_filtered = df +495 if df_filtered.empty: +496 plt.close() +497 return +498 df_filtered = df_filtered.reset_index(drop=True) +499 fig, ax1 = plt.subplots(figsize=(18, 18)) +500 ax1.invert_yaxis() +501 # Dynamically set y-axis limits based on depth data +502 max_depth = df_filtered['depth_00'].max() +503 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +504 +505 color_temp = 'tab:blue' +506 ax1.scatter(df_filtered['temperature_00'], df_filtered['depth_00'], color=color_temp, +507 label="Temperature (°C)") +508 ax1.set_xlabel("Temperature (°C)", color=color_temp) +509 ax1.set_ylabel('Depth (m)') +510 ax1.tick_params(axis='x', labelcolor=color_temp) +511 mld_cols = [] +512 for col in df.columns: +513 if "MLD" in col and "Actual" not in col: +514 mld_cols.append(df[col]) +515 refdepth_cols = [] +516 for col in df.columns: +517 if "Reference Depth" in col: +518 refdepth_cols.append(df[col]) +519 for idx, mld_col in enumerate(mld_cols): +520 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +521 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +522 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +523 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +524 if df_filtered['overturn'].any(): +525 plt.title( +526 f"{filename}\n Depth vs. Temperature \n " +527 f"THIS IS AN UNSTABLE WATER COLUMN \n" +528 f"(Higher density fluid lies above lower density fluid)") +529 else: +530 plt.title( +531 f"{filename}\n Depth vs. Temperature \n " +532 f"THIS IS AN UNSTABLE WATER COLUMN \n" +533 f"(Higher density fluid lies above lower density fluid)") +534 ax1.grid(True) +535 lines, labels = ax1.get_legend_handles_labels() +536 ax1.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3) +537 plot_path = os.path.join(self._cwd, f"plots/{filename}_temperature_depth_plot.png") +538 plot_folder = os.path.join(self._cwd, "plots") +539 if not (os.path.isdir(plot_folder)): +540 os.mkdir(plot_folder) +541 plt.savefig(plot_path) +542 plt.close(fig) +543 +544 class Utility: +545 """ +546 Utility +547 -------- +548 Utility class for CTD data processing. +549 +550 Attributes +551 ---------- +552 filename : str +553 Filename of the RSK file. +554 mastersheet : str +555 Path to the master sheet Excel file. +556 """ +557 +558 def __init__(self, filename): +559 """ +560 Initialize a new Utility object. +561 Parameters +562 ---------- +563 filename : str +564 The filename of the RSK file. +565 """ +566 self.filename = filename +567 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) +568 +569 def no_values_in_object(self, object_to_check): +570 """ +571 Checks if the given object is None, empty, or has a length greater than 0. +572 Returns True if the object has no values, False otherwise. +573 +574 Parameters +575 ---------- +576 object_to_check : object +577 The object to check for values. +578 Returns +579 -------- +580 bool +581 True if the object has no values, False otherwise. +582 """ +583 if isinstance(object_to_check, type(None)): 584 return True -585 if len(object_to_check) > 0: -586 return False -587 -588 def process_master_sheet(self, master_sheet_path, filename): -589 """ -590 Extracts the date and time components from the filename and compares them with the data -591 in the master sheet. Calculates the absolute differences between the dates and times to -592 find the closest match. Returns the estimated latitude, longitude, and updated filename -593 based on the closest match. -594 -595 Parameters -596 ---------- -597 master_sheet_path : str -598 The path to the master sheet Excel file. -599 -600 filename : str -601 The filename of the RSK file. -602 -603 Returns -604 ------- -605 tuple -606 A tuple containing the estimated latitude, longitude, and updated filename. -607 """ -608 -609 def get_date_from_string(filename): -610 try: -611 year = filename.split('_')[1][:4] -612 month = filename.split('_')[1][4:6] -613 day = filename.split('_')[1][6:] -614 hour = filename.split('_')[2][0:2] -615 minute = filename.split('_')[2][2:4] -616 time = f"{hour}:{minute}" -617 return float(year), float(month), float(day), time -618 except: -619 return None, None, None, None -620 -621 # Function to calculate the absolute difference between two dates -622 def date_difference(row, target_year, target_month, target_day): -623 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( -624 row['day'] - target_day) -625 -626 # Function to calculate the absolute difference between two times -627 def time_difference(target_time, df_time): -628 df_time_str = str(df_time) -629 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] -630 try: -631 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] -632 except: -633 return None -634 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) -635 -636 # Load the master sheet -637 master_df = pd.read_excel(master_sheet_path) -638 # Get date and time components from the filename -639 year, month, day, time = get_date_from_string(filename) -640 if year is None: -641 return -642 # Calculate absolute differences for each row in 'master_df' -643 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) -644 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) -645 # Find the rows with the smallest total difference for date -646 smallest_date_difference = master_df['date_difference'].min() -647 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] -648 # Check if time_difference returns None -649 if closest_date_rows['time_difference'].isnull().any(): -650 closest_time_time = None -651 closest_row_overall = closest_date_rows.iloc[0] -652 else: -653 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference -654 if len(closest_date_rows) > 1: -655 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] -656 closest_row_overall = closest_time_row -657 closest_time_time = closest_row_overall['time_local'] -658 else: -659 closest_row_overall = closest_date_rows.iloc[0] -660 closest_time_time = closest_row_overall['time_local'] -661 latitude = closest_row_overall['latitude'] -662 longitude = closest_row_overall['longitude'] -663 unique_id = closest_row_overall.iloc[0] -664 RBRfilename = filename + "_gpscm" -665 # Access the closest date components -666 closest_date_year = closest_row_overall['year'] -667 closest_date_month = closest_row_overall['month'] -668 closest_date_day = closest_row_overall['day'] -669 # Print the closest date and time -670 print("|-ESTIMATION ALERT-|") -671 print("Had to guess location on file: " + filename) -672 print("Unique ID: " + unique_id) -673 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) -674 print("Lat: " + str(latitude)) -675 print("Long: " + str(longitude)) -676 if closest_time_time: -677 print("Closest Time:", closest_time_time) -678 print("====================") -679 return latitude, longitude, RBRfilename -680 -681 def get_sample_location(self, rsk, filename): -682 """ -683 Retrieves the sample location data from the RSK file. If no location data is found, -684 it attempts to estimate the location using the master sheet. Returns the latitude, -685 longitude, and updated filename. -686 -687 Parameters -688 ---------- -689 rsk : RSK -690 Ruskin object of the RSK file. -691 filename : str -692 The filename of the RSK file. +585 if object_to_check.empty: +586 return True +587 if len(object_to_check) > 0: +588 return False +589 +590 def process_master_sheet(self, master_sheet_path, filename): +591 """ +592 Extracts the date and time components from the filename and compares them with the data +593 in the master sheet. Calculates the absolute differences between the dates and times to +594 find the closest match. Returns the estimated latitude, longitude, and updated filename +595 based on the closest match. +596 +597 Parameters +598 ---------- +599 master_sheet_path : str +600 The path to the master sheet Excel file. +601 +602 filename : str +603 The filename of the RSK file. +604 +605 Returns +606 ------- +607 tuple +608 A tuple containing the estimated latitude, longitude, and updated filename. +609 """ +610 +611 def get_date_from_string(filename): +612 try: +613 year = filename.split('_')[1][:4] +614 month = filename.split('_')[1][4:6] +615 day = filename.split('_')[1][6:] +616 hour = filename.split('_')[2][0:2] +617 minute = filename.split('_')[2][2:4] +618 time = f"{hour}:{minute}" +619 return float(year), float(month), float(day), time +620 except: +621 return None, None, None, None +622 +623 # Function to calculate the absolute difference between two dates +624 def date_difference(row, target_year, target_month, target_day): +625 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( +626 row['day'] - target_day) +627 +628 # Function to calculate the absolute difference between two times +629 def time_difference(target_time, df_time): +630 df_time_str = str(df_time) +631 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] +632 try: +633 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] +634 except: +635 return None +636 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) +637 +638 # Check if the master sheet is already cached +639 if CTD._cached_master_sheet is None: +640 # Load the master sheet and cache it +641 CTD._cached_master_sheet = pd.read_excel(master_sheet_path) +642 +643 # Use the cached master sheet data +644 master_df = CTD._cached_master_sheet.copy() +645 # Get date and time components from the filename +646 year, month, day, time = get_date_from_string(filename) +647 if year is None: +648 return +649 # Calculate absolute differences for each row in 'master_df' +650 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) +651 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) +652 # Find the rows with the smallest total difference for date +653 smallest_date_difference = master_df['date_difference'].min() +654 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] +655 # Check if time_difference returns None +656 if closest_date_rows['time_difference'].isnull().any(): +657 closest_time_time = None +658 closest_row_overall = closest_date_rows.iloc[0] +659 else: +660 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference +661 if len(closest_date_rows) > 1: +662 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] +663 closest_row_overall = closest_time_row +664 closest_time_time = closest_row_overall['time_local'] +665 else: +666 closest_row_overall = closest_date_rows.iloc[0] +667 closest_time_time = closest_row_overall['time_local'] +668 latitude = closest_row_overall['latitude'] +669 longitude = closest_row_overall['longitude'] +670 unique_id = closest_row_overall.iloc[0] +671 RBRfilename = filename + "_gpscm" +672 # Access the closest date components +673 closest_date_year = closest_row_overall['year'] +674 closest_date_month = closest_row_overall['month'] +675 closest_date_day = closest_row_overall['day'] +676 # Print the closest date and time +677 print("|-ESTIMATION ALERT-|") +678 print("Had to guess location on file: " + filename) +679 print("Unique ID: " + unique_id) +680 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) +681 print("Lat: " + str(latitude)) +682 print("Long: " + str(longitude)) +683 if closest_time_time: +684 print("Closest Time:", closest_time_time) +685 print("====================") +686 return latitude, longitude, RBRfilename +687 +688 def get_sample_location(self, rsk, filename): +689 """ +690 Retrieves the sample location data from the RSK file. If no location data is found, +691 it attempts to estimate the location using the master sheet. Returns the latitude, +692 longitude, and updated filename. 693 -694 Returns -695 ------- -696 tuple -697 A tuple containing the latitude associated with the sample, longitude associated with the sample, -698 and the filename, adds _gps if the location was in the ruskin file, -699 _gpscm if located via mastersheet, or _gpserror if unable to locate. -700 """ -701 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one -702 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) -703 if geo_data_length < 1: -704 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -705 self.mastersheet, filename) -706 return latitude_intermediate, longitude_intermediate, filename -707 else: -708 for geo in itertools.islice(rsk.geodata(), None): -709 # Is there geo data? -710 if geo.latitude is not None: -711 # If there is, is it from the southern ocean? -712 if not (geo.latitude > -60): -713 try: -714 latitude_intermediate = geo.latitude[0] -715 longitude_intermediate = geo.longitude[0] -716 filename += "_gps" -717 return latitude_intermediate, longitude_intermediate, filename -718 except: -719 latitude_intermediate = geo.latitude -720 longitude_intermediate = geo.longitude -721 filename += "_gps" -722 return latitude_intermediate, longitude_intermediate, filename -723 else: -724 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -725 self.mastersheet, filename) -726 return latitude_intermediate, longitude_intermediate, filename -727 else: -728 return None, None, filename + 'gpserror' -729 -730 def remove_sample_timezone_indicator(self, df): -731 """ -732 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the -733 given DataFrame. Returns the updated DataFrame. -734 -735 Parameters -736 ---------- -737 df : DataFrame -738 The DataFrame to process. -739 -740 Returns -741 ------- -742 DataFrame -743 The updated DataFrame with the timezone indicator removed. -744 """ -745 if self.no_values_in_object(df): -746 return None -747 if 'timestamp' in df.columns: -748 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] -749 return df -750 else: -751 return df -752 -753 def remove_rows_with_negative_depth(self, df): -754 """ -755 Removes rows from the given DataFrame where the 'depth_00' column has negative values. -756 Returns the updated DataFrame. -757 -758 Parameter -759 --------- -760 df : DataFrame -761 The DataFrame to process. -762 -763 Returns -764 ------- -765 DataFrame -766 The updated DataFrame with rows containing negative depth values removed. -767 """ -768 if self.no_values_in_object(df): -769 return None -770 if 'depth_00' in df.columns: -771 df = df[df['depth_00'] >= 0].reset_index(drop=True) -772 else: -773 return None -774 if self.no_values_in_object(df): -775 return None -776 return df.copy() -777 -778 def remove_rows_with_negative_salinity(self, df): -779 """ -780 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. -781 Returns the updated DataFrame. -782 -783 Parameters -784 ---------- -785 df: DataFrame -786 The DataFrame to process. -787 -788 Returns -789 ------- -790 DataFrame -791 The updated DataFrame with rows containing negative salinity values removed. -792 """ -793 if self.no_values_in_object(df): -794 return None -795 if 'salinity_00' in df.columns: -796 df = df[df['salinity_00'] >= 0].reset_index(drop=True) -797 else: -798 return None -799 if self.no_values_in_object(df): -800 return None -801 return df.copy() -802 -803 def remove_rows_with_negative_pressure(self, df): -804 """ -805 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. -806 Returns the updated DataFrame. -807 -808 Parameters -809 ---------- -810 df: DataFrame -811 The DataFrame to process. -812 -813 Returns -814 ------- -815 DataFrame -816 The updated DataFrame with rows containing negative pressure values removed. -817 """ -818 if self.no_values_in_object(df): -819 return None -820 if 'pressure_00' in df.columns: -821 df = df[df['pressure_00'] >= 0].reset_index(drop=True) -822 else: -823 return None -824 if self.no_values_in_object(df): -825 return None -826 return df.copy() -827 -828 def remove_rows_with_negative_salinityabs(self, df): -829 """ -830 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. -831 Returns the updated DataFrame. -832 -833 Parameters -834 ---------- -835 df: DataFrame -836 The DataFrame to process. -837 -838 Returns -839 ------- -840 DataFrame -841 The updated DataFrame with rows containing negative absolute salinity values removed. -842 """ -843 if self.no_values_in_object(df): -844 return None -845 if 'salinityabs' in df.columns: -846 df = df[df['salinityabs'] >= 0].reset_index(drop=True) -847 else: -848 return None -849 if self.no_values_in_object(df): -850 return None -851 return df.copy() -852 -853 def remove_rows_with_negative_density(self, df): -854 """ -855 Removes rows from the given DataFrame where the 'density' column has negative values. -856 Returns the updated DataFrame. -857 -858 Parameters -859 ---------- -860 df: DataFrame -861 The DataFrame to process. -862 -863 Returns -864 ------- -865 DataFrame -866 The updated DataFrame with rows containing negative density values removed. -867 """ -868 if self.no_values_in_object(df): -869 return None -870 if 'density' in df.columns: -871 df = df[df['density'] >= 0].reset_index(drop=True) -872 else: -873 return None -874 if self.no_values_in_object(df): -875 return None -876 return df.copy() +694 Parameters +695 ---------- +696 rsk : RSK +697 Ruskin object of the RSK file. +698 filename : str +699 The filename of the RSK file. +700 +701 Returns +702 ------- +703 tuple +704 A tuple containing the latitude associated with the sample, longitude associated with the sample, +705 and the filename, adds _gps if the location was in the ruskin file, +706 _gpscm if located via mastersheet, or _gpserror if unable to locate. +707 """ +708 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one +709 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) +710 if geo_data_length < 1: +711 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +712 self.mastersheet, filename) +713 return latitude_intermediate, longitude_intermediate, filename +714 else: +715 for geo in itertools.islice(rsk.geodata(), None): +716 # Is there geo data? +717 if geo.latitude is not None: +718 # If there is, is it from the southern ocean? +719 if not (geo.latitude > -60): +720 try: +721 latitude_intermediate = geo.latitude[0] +722 longitude_intermediate = geo.longitude[0] +723 filename += "_gps" +724 return latitude_intermediate, longitude_intermediate, filename +725 except: +726 latitude_intermediate = geo.latitude +727 longitude_intermediate = geo.longitude +728 filename += "_gps" +729 return latitude_intermediate, longitude_intermediate, filename +730 else: +731 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +732 self.mastersheet, filename) +733 return latitude_intermediate, longitude_intermediate, filename +734 else: +735 return None, None, filename + 'gpserror' +736 +737 def remove_sample_timezone_indicator(self, df): +738 """ +739 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the +740 given DataFrame. Returns the updated DataFrame. +741 +742 Parameters +743 ---------- +744 df : DataFrame +745 The DataFrame to process. +746 +747 Returns +748 ------- +749 DataFrame +750 The updated DataFrame with the timezone indicator removed. +751 """ +752 if self.no_values_in_object(df): +753 return None +754 if 'timestamp' in df.columns: +755 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] +756 return df +757 else: +758 return df +759 +760 def remove_rows_with_negative_depth(self, df): +761 """ +762 Removes rows from the given DataFrame where the 'depth_00' column has negative values. +763 Returns the updated DataFrame. +764 +765 Parameter +766 --------- +767 df : DataFrame +768 The DataFrame to process. +769 +770 Returns +771 ------- +772 DataFrame +773 The updated DataFrame with rows containing negative depth values removed. +774 """ +775 if self.no_values_in_object(df): +776 return None +777 if 'depth_00' in df.columns: +778 df = df[df['depth_00'] >= 0].reset_index(drop=True) +779 else: +780 return None +781 if self.no_values_in_object(df): +782 return None +783 return df.copy() +784 +785 def remove_rows_with_negative_salinity(self, df): +786 """ +787 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. +788 Returns the updated DataFrame. +789 +790 Parameters +791 ---------- +792 df: DataFrame +793 The DataFrame to process. +794 +795 Returns +796 ------- +797 DataFrame +798 The updated DataFrame with rows containing negative salinity values removed. +799 """ +800 if self.no_values_in_object(df): +801 return None +802 if 'salinity_00' in df.columns: +803 df = df[df['salinity_00'] >= 0].reset_index(drop=True) +804 else: +805 return None +806 if self.no_values_in_object(df): +807 return None +808 return df.copy() +809 +810 def remove_rows_with_negative_pressure(self, df): +811 """ +812 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. +813 Returns the updated DataFrame. +814 +815 Parameters +816 ---------- +817 df: DataFrame +818 The DataFrame to process. +819 +820 Returns +821 ------- +822 DataFrame +823 The updated DataFrame with rows containing negative pressure values removed. +824 """ +825 if self.no_values_in_object(df): +826 return None +827 if 'pressure_00' in df.columns: +828 df = df[df['pressure_00'] >= 0].reset_index(drop=True) +829 else: +830 return None +831 if self.no_values_in_object(df): +832 return None +833 return df.copy() +834 +835 def remove_rows_with_negative_salinityabs(self, df): +836 """ +837 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. +838 Returns the updated DataFrame. +839 +840 Parameters +841 ---------- +842 df: DataFrame +843 The DataFrame to process. +844 +845 Returns +846 ------- +847 DataFrame +848 The updated DataFrame with rows containing negative absolute salinity values removed. +849 """ +850 if self.no_values_in_object(df): +851 return None +852 if 'salinityabs' in df.columns: +853 df = df[df['salinityabs'] >= 0].reset_index(drop=True) +854 else: +855 return None +856 if self.no_values_in_object(df): +857 return None +858 return df.copy() +859 +860 def remove_rows_with_negative_density(self, df): +861 """ +862 Removes rows from the given DataFrame where the 'density' column has negative values. +863 Returns the updated DataFrame. +864 +865 Parameters +866 ---------- +867 df: DataFrame +868 The DataFrame to process. +869 +870 Returns +871 ------- +872 DataFrame +873 The updated DataFrame with rows containing negative density values removed. +874 """ +875 if self.no_values_in_object(df): +876 return None +877 if 'density' in df.columns: +878 df = df[df['density'] >= 0].reset_index(drop=True) +879 else: +880 return None +881 if self.no_values_in_object(df): +882 return None +883 return df.copy() @@ -2548,22 +2568,22 @@ Notes - 72 def __init__(self, rskfilepath): -73 """ -74 Initialize a new CTD object. -75 -76 Parameters -77 ---------- -78 rskfilepath : str -79 The file path to the RSK file. -80 """ -81 self._rsk = RSK(rskfilepath) -82 self._filename = ('_'.join(rskfilepath.split("/")[-1].split("_")[0:3]).split(".rsk")[0]) -83 print("New CTDFjorder Object Created from : " + self._filename) -84 self._ctd_array = np.array(self._rsk.npsamples()) -85 self._ctd_array = pd.DataFrame(self._ctd_array) -86 self.Utility = self.Utility(self._filename) -87 self._cwd = _get_cwd() + 73 def __init__(self, rskfilepath): +74 """ +75 Initialize a new CTD object. +76 +77 Parameters +78 ---------- +79 rskfilepath : str +80 The file path to the RSK file. +81 """ +82 self._rsk = RSK(rskfilepath) +83 self._filename = ('_'.join(rskfilepath.split("/")[-1].split("_")[0:3]).split(".rsk")[0]) +84 print("New CTDFjorder Object Created from : " + self._filename) +85 self._ctd_array = np.array(self._rsk.npsamples()) +86 self._ctd_array = pd.DataFrame(self._ctd_array) +87 self.Utility = self.Utility(self._filename) +88 self._cwd = _get_cwd() @@ -2602,11 +2622,11 @@ Parameters - 89 def view_table(self): -90 """ -91 Print the CTD data table. -92 """ -93 print(tabulate(self._ctd_array, headers='keys', tablefmt='psql')) + 90 def view_table(self): +91 """ +92 Print the CTD data table. +93 """ +94 print(tabulate(self._ctd_array, headers='keys', tablefmt='psql')) @@ -2626,21 +2646,21 @@ Parameters - 95 def get_pandas_df(self , copy = True): - 96 """ - 97 Exposes the dataframe of the CTD object for custom processes. - 98 - 99 Parameters -100 ---------- -101 copy : bool, optional -102 If True returns a copy, if False returns the actual DataFrame internal to the CTD object. Defaults to True. -103 -104 Returns -105 ------- -106 DataFrame -107 The pandas df of the CTD object. -108 """ -109 return self._ctd_array.copy() if copy is True else self._ctd_array + 96 def get_pandas_df(self , copy = True): + 97 """ + 98 Exposes the dataframe of the CTD object for custom processes. + 99 +100 Parameters +101 ---------- +102 copy : bool, optional +103 If True returns a copy, if False returns the actual DataFrame internal to the CTD object. Defaults to True. +104 +105 Returns +106 ------- +107 DataFrame +108 The pandas df of the CTD object. +109 """ +110 return self._ctd_array.copy() if copy is True else self._ctd_array @@ -2673,11 +2693,11 @@ Returns - 111 def add_filename_to_table(self): -112 """ -113 Add the filename to the CTD data table. -114 """ -115 self._ctd_array.assign(filename=self._filename) + 112 def add_filename_to_table(self): +113 """ +114 Add the filename to the CTD data table. +115 """ +116 self._ctd_array.assign(filename=self._filename) @@ -2697,11 +2717,11 @@ Returns - 117 def remove_timezone_indicator(self): -118 """ -119 Removes the timezone indicator in the CTD data table 'timestamp' column. -120 """ -121 self._ctd_array = self.Utility.remove_sample_timezone_indicator(self._ctd_array) + 118 def remove_timezone_indicator(self): +119 """ +120 Removes the timezone indicator in the CTD data table 'timestamp' column. +121 """ +122 self._ctd_array = self.Utility.remove_sample_timezone_indicator(self._ctd_array) @@ -2721,22 +2741,23 @@ Returns - 123 def add_location_to_table(self): -124 """ -125 Retrieves the sample location data from the RSK file and adds it to the CTD data table. -126 If no location data is found, it attempts to estimate the location using the master sheet. -127 """ -128 location_data = self.Utility.get_sample_location(self._rsk, self._filename) -129 if self.Utility.no_values_in_object(self._ctd_array): -130 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -131 try: -132 self._ctd_array = self._ctd_array.assign(latitude=location_data[0], -133 longitude=location_data[1]) -134 except Exception: -135 self._ctd_array.loc['latitude'] = None -136 self._ctd_array.loc['longitude'] = None -137 self._ctd_array.loc['filename'] = None -138 raise CTDError(self._filename, self._NO_LOCATION_ERROR) + 124 def add_location_to_table(self): +125 """ +126 Retrieves the sample location data from the RSK file and adds it to the CTD data table. +127 If no location data is found, it attempts to estimate the location using the master sheet. +128 """ +129 location_data = self.Utility.get_sample_location(self._rsk, self._filename) +130 if self.Utility.no_values_in_object(self._ctd_array): +131 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +132 try: +133 self._ctd_array = self._ctd_array.assign(latitude=location_data[0], +134 longitude=location_data[1], +135 filename=location_data[2]) +136 except Exception: +137 self._ctd_array.loc['latitude'] = None +138 self._ctd_array.loc['longitude'] = None +139 self._ctd_array.loc['filename'] = None +140 raise CTDError(self._filename, self._NO_LOCATION_ERROR) @@ -2757,13 +2778,13 @@ Returns - 139 def remove_upcasts(self): -140 """ -141 Finds the global maximum depth of the sample, and filters out timestamps that occur before it. -142 """ -143 max_depth_index = self._ctd_array['depth_00'].idxmax() -144 max_depth_timestamp = self._ctd_array.loc[max_depth_index, 'timestamp'] -145 self._ctd_array = self._ctd_array[self._ctd_array['timestamp'] >=max_depth_timestamp] + 141 def remove_upcasts(self): +142 """ +143 Finds the global maximum depth of the sample, and filters out timestamps that occur before it. +144 """ +145 max_depth_index = self._ctd_array['depth_00'].idxmax() +146 max_depth_timestamp = self._ctd_array.loc[max_depth_index, 'timestamp'] +147 self._ctd_array = self._ctd_array[self._ctd_array['timestamp'] >=max_depth_timestamp] @@ -2783,27 +2804,27 @@ Returns - 147 def remove_non_positive_samples(self): -148 """ -149 Iterates through the columns of the CTD data table and removes rows with non-positive values -150 for depth, pressure, salinity, absolute salinity, or density. -151 """ -152 if self.Utility.no_values_in_object(self._ctd_array): -153 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -154 for column in self._ctd_array.columns: -155 match column: -156 case 'depth_00': -157 self._ctd_array = self.Utility.remove_rows_with_negative_depth(self._ctd_array) -158 case 'pressure_00': -159 self._ctd_array = self.Utility.remove_rows_with_negative_pressure(self._ctd_array) -160 case 'salinity_00': -161 self._ctd_array = self.Utility.remove_rows_with_negative_salinity(self._ctd_array) -162 case 'salinityabs': -163 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) -164 case 'density': -165 self._ctd_array = self.Utility.remove_rows_with_negative_density(self._ctd_array) -166 if self.Utility.no_values_in_object(self._ctd_array): -167 raise CTDError(self._filename, self._REMOVE_NEGATIVES_ERROR) + 149 def remove_non_positive_samples(self): +150 """ +151 Iterates through the columns of the CTD data table and removes rows with non-positive values +152 for depth, pressure, salinity, absolute salinity, or density. +153 """ +154 if self.Utility.no_values_in_object(self._ctd_array): +155 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +156 for column in self._ctd_array.columns: +157 match column: +158 case 'depth_00': +159 self._ctd_array = self.Utility.remove_rows_with_negative_depth(self._ctd_array) +160 case 'pressure_00': +161 self._ctd_array = self.Utility.remove_rows_with_negative_pressure(self._ctd_array) +162 case 'salinity_00': +163 self._ctd_array = self.Utility.remove_rows_with_negative_salinity(self._ctd_array) +164 case 'salinityabs': +165 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) +166 case 'density': +167 self._ctd_array = self.Utility.remove_rows_with_negative_density(self._ctd_array) +168 if self.Utility.no_values_in_object(self._ctd_array): +169 raise CTDError(self._filename, self._REMOVE_NEGATIVES_ERROR) @@ -2824,36 +2845,36 @@ Returns - 169 def clean(self, feature, method='salinitydiff'): -170 """ -171 Applies complex data cleaning methods to the specified feature based on the selected method. -172 Currently supports cleaning practical salinity using the 'salinitydiff' method. -173 -174 Parameters -175 ---------- -176 feature : str -177 The feature to clean (e.g., 'practicalsalinity'). -178 method : str, optional -179 The cleaning method to apply, defaults to 'salinitydiff'. -180 Options are 'salinitydiff'. -181 """ -182 if self.Utility.no_values_in_object(self._ctd_array): -183 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -184 supported_features = { -185 "practicalsalinity": "salinity_00" -186 } -187 supported_methods = { -188 "salinitydiff": Calculate.calculate_and_drop_salinity_spikes(self._ctd_array), -189 } -190 if feature in supported_features.keys(): -191 if method in supported_methods.keys(): -192 self._ctd_array.loc[self._ctd_array.index, 'salinity_00'] = supported_methods[method] -193 else: -194 print(f"clean: Invalid method \"{method}\" not in {supported_methods.keys()}") -195 else: -196 print(f"clean: Invalid feature \"{feature}\" not in {supported_features.keys()}.") -197 if self.Utility.no_values_in_object(self._ctd_array): -198 raise CTDError(self._filename, self._DATA_CLEANING_ERROR) + 171 def clean(self, feature, method='salinitydiff'): +172 """ +173 Applies complex data cleaning methods to the specified feature based on the selected method. +174 Currently supports cleaning practical salinity using the 'salinitydiff' method. +175 +176 Parameters +177 ---------- +178 feature : str +179 The feature to clean (e.g., 'practicalsalinity'). +180 method : str, optional +181 The cleaning method to apply, defaults to 'salinitydiff'. +182 Options are 'salinitydiff'. +183 """ +184 if self.Utility.no_values_in_object(self._ctd_array): +185 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +186 supported_features = { +187 "practicalsalinity": "salinity_00" +188 } +189 supported_methods = { +190 "salinitydiff": Calculate.calculate_and_drop_salinity_spikes(self._ctd_array), +191 } +192 if feature in supported_features.keys(): +193 if method in supported_methods.keys(): +194 self._ctd_array.loc[self._ctd_array.index, 'salinity_00'] = supported_methods[method] +195 else: +196 print(f"clean: Invalid method \"{method}\" not in {supported_methods.keys()}") +197 else: +198 print(f"clean: Invalid feature \"{feature}\" not in {supported_features.keys()}.") +199 if self.Utility.no_values_in_object(self._ctd_array): +200 raise CTDError(self._filename, self._DATA_CLEANING_ERROR) @@ -2884,18 +2905,18 @@ Parameters - 200 def add_absolute_salinity(self): -201 """ -202 Calculates the absolute salinity using the TEOS-10 equations and adds it as a new column -203 to the CTD data table. Removes rows with negative absolute salinity values. -204 """ -205 if self.Utility.no_values_in_object(self._ctd_array): -206 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -207 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = Calculate.calculate_absolute_salinity( -208 self._ctd_array) -209 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) -210 if self.Utility.no_values_in_object(self._ctd_array): -211 raise CTDError(self._filename, self._SALINITYABS_CALCULATION_ERROR) + 202 def add_absolute_salinity(self): +203 """ +204 Calculates the absolute salinity using the TEOS-10 equations and adds it as a new column +205 to the CTD data table. Removes rows with negative absolute salinity values. +206 """ +207 if self.Utility.no_values_in_object(self._ctd_array): +208 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +209 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = Calculate.calculate_absolute_salinity( +210 self._ctd_array) +211 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) +212 if self.Utility.no_values_in_object(self._ctd_array): +213 raise CTDError(self._filename, self._SALINITYABS_CALCULATION_ERROR) @@ -2916,24 +2937,24 @@ Parameters - 213 def add_density(self): -214 """ -215 Calculates the density using the TEOS-10 equations and adds it as a new column to the CTD -216 data table. If absolute salinity is not present, it is calculated first. -217 """ -218 if self.Utility.no_values_in_object(self._ctd_array): -219 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -220 if 'salinityabs' in self._ctd_array.columns: -221 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( -222 self._ctd_array) -223 else: -224 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = self.add_absolute_salinity() -225 self._ctd_array = Calculate.calculate_absolute_density(self._ctd_array) -226 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( -227 self._ctd_array) -228 self._ctd_array.drop('salinityabs') -229 if self.Utility.no_values_in_object(self._ctd_array): -230 raise CTDError(self._filename, self._DENSITY_CALCULATION_ERROR) + 215 def add_density(self): +216 """ +217 Calculates the density using the TEOS-10 equations and adds it as a new column to the CTD +218 data table. If absolute salinity is not present, it is calculated first. +219 """ +220 if self.Utility.no_values_in_object(self._ctd_array): +221 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +222 if 'salinityabs' in self._ctd_array.columns: +223 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( +224 self._ctd_array) +225 else: +226 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = self.add_absolute_salinity() +227 self._ctd_array = Calculate.calculate_absolute_density(self._ctd_array) +228 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( +229 self._ctd_array) +230 self._ctd_array.drop('salinityabs') +231 if self.Utility.no_values_in_object(self._ctd_array): +232 raise CTDError(self._filename, self._DENSITY_CALCULATION_ERROR) @@ -2954,14 +2975,14 @@ Parameters - 232 def add_overturns(self): -233 """ -234 Calculates density changes between consecutive measurements and identifies overturns where -235 denser water lies above less dense water. Adds an 'overturn' column to the CTD data table. -236 """ -237 if self.Utility.no_values_in_object(self._ctd_array): -238 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -239 self._ctd_array = Calculate.calculate_overturns(self._ctd_array.copy()) + 234 def add_overturns(self): +235 """ +236 Calculates density changes between consecutive measurements and identifies overturns where +237 denser water lies above less dense water. Adds an 'overturn' column to the CTD data table. +238 """ +239 if self.Utility.no_values_in_object(self._ctd_array): +240 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +241 self._ctd_array = Calculate.calculate_overturns(self._ctd_array.copy()) @@ -2982,22 +3003,22 @@ Parameters - 241 def add_mean_surface_density(self, start = 0.0, end = 100.0): -242 """ -243 Calculates the mean surface density from the density values and adds it as a new column -244 to the CTD data table. -245 -246 Parameters -247 ---------- -248 start : float, optional -249 Depth bound, defaults to 0. -250 end : float, optional -251 Depth bound, default to 1. -252 """ -253 if self.Utility.no_values_in_object(self._ctd_array): -254 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -255 mean_surface_density = Calculate.calculate_mean_surface_density(self._ctd_array.copy(), (start, end)) -256 self._ctd_array = self._ctd_array.assign(mean_surface_density=mean_surface_density) + 243 def add_mean_surface_density(self, start = 0.0, end = 100.0): +244 """ +245 Calculates the mean surface density from the density values and adds it as a new column +246 to the CTD data table. +247 +248 Parameters +249 ---------- +250 start : float, optional +251 Depth bound, defaults to 0. +252 end : float, optional +253 Depth bound, default to 1. +254 """ +255 if self.Utility.no_values_in_object(self._ctd_array): +256 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +257 mean_surface_density = Calculate.calculate_mean_surface_density(self._ctd_array.copy(), (start, end)) +258 self._ctd_array = self._ctd_array.assign(mean_surface_density=mean_surface_density) @@ -3027,43 +3048,43 @@ Parameters - 258 def add_mld(self, reference, method="default"): -259 """ -260 Calculates the mixed layer depth using the specified method and reference depth. -261 Adds the MLD and the actual reference depth used as new columns to the CTD data table. -262 -263 Parameters -264 ---------- -265 reference : int -266 The reference depth for MLD calculation. -267 method : int -268 The MLD calculation method (default: "default"). -269 """ -270 if self.Utility.no_values_in_object(self._ctd_array): -271 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -272 copy_ctd_array = self._ctd_array.copy() -273 supported_methods = [ -274 "default" -275 ] -276 unpack = None -277 -278 if method == "default": -279 unpack = Calculate.calculate_mld(copy_ctd_array['density'], copy_ctd_array['depth_00'], -280 reference) -281 else: -282 print(f"add_mld: Invalid method \"{method}\" not in {supported_methods}") -283 unpack = [None, None] -284 if unpack is None: + 260 def add_mld(self, reference, method="default"): +261 """ +262 Calculates the mixed layer depth using the specified method and reference depth. +263 Adds the MLD and the actual reference depth used as new columns to the CTD data table. +264 +265 Parameters +266 ---------- +267 reference : int +268 The reference depth for MLD calculation. +269 method : int +270 The MLD calculation method (default: "default"). +271 """ +272 if self.Utility.no_values_in_object(self._ctd_array): +273 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +274 copy_ctd_array = self._ctd_array.copy() +275 supported_methods = [ +276 "default" +277 ] +278 unpack = None +279 +280 if method == "default": +281 unpack = Calculate.calculate_mld(copy_ctd_array['density'], copy_ctd_array['depth_00'], +282 reference) +283 else: +284 print(f"add_mld: Invalid method \"{method}\" not in {supported_methods}") 285 unpack = [None, None] -286 raise CTDError("MLD could not be calculated.") -287 MLD = unpack[0] -288 depth_used_as_reference = unpack[1] -289 self._ctd_array.loc[self._ctd_array.index, f'MLD {reference}'] = MLD -290 self._ctd_array.loc[ -291 self._ctd_array.index, f'MLD {reference} Actual Reference Depth'] = depth_used_as_reference -292 self._ctd_array = copy_ctd_array.merge(self._ctd_array) -293 if self.Utility.no_values_in_object(self._ctd_array): -294 raise CTDError(self._filename, self._MLD_ERROR) +286 if unpack is None: +287 unpack = [None, None] +288 raise CTDError("MLD could not be calculated.") +289 MLD = unpack[0] +290 depth_used_as_reference = unpack[1] +291 self._ctd_array.loc[self._ctd_array.index, f'MLD {reference}'] = MLD +292 self._ctd_array.loc[ +293 self._ctd_array.index, f'MLD {reference} Actual Reference Depth'] = depth_used_as_reference +294 self._ctd_array = copy_ctd_array.merge(self._ctd_array) +295 if self.Utility.no_values_in_object(self._ctd_array): +296 raise CTDError(self._filename, self._MLD_ERROR) @@ -3093,55 +3114,55 @@ Parameters - 296 def save_to_csv(self, output_file): -297 """ -298 Renames the columns of the CTD data table based on a predefined mapping and saves the -299 data to the specified CSV file. If the file already exists, the data is appended to it. -300 -301 Parameters -302 ---------- -303 output_file : str -304 The output CSV file path. -305 """ -306 rsk_labels = { -307 "temperature_00": "Temperature (°C)", -308 "pressure_00": "Pressure (dbar)", -309 "chlorophyll_00": "Chlorophyll a (µg/l)", -310 "seapressure_00": "Sea Pressure (dbar)", -311 "depth_00": "Depth (m)", -312 "salinity_00": "Salinity (PSU)", -313 "speedofsound_00": "Speed of Sound (m/s)", -314 "specificconductivity_00": "Specific Conductivity (µS/cm)", -315 "conductivity_00": "Conductivity (mS/cm)", -316 "density": "Density (kg/m^3) Derived", -317 "salinityabs": "Absolute Salinity (g/kg) Derived", -318 "MLD_Zero": "MLD Zero (m) Derived", -319 "MLD_Ten": "MLD Ten (m) Derived", -320 "stratification": "Stratification (J/m^2) Derived", -321 "mean_surface_density": "Mean Surface Density (kg/m^3) Derived", -322 "overturn": "Overturn (Δρ < -0.05)" -323 } -324 # Renaming columns -325 data = self._ctd_array.copy() -326 if 'filename' in data.columns: -327 data = data[[col for col in data.columns if col != 'filename'] + ['filename']] -328 for key, new_column_name in rsk_labels.items(): -329 if key in data.columns: -330 data = data.rename(columns={key: new_column_name}) -331 data.reset_index(inplace=True, drop=True) -332 try: -333 csv_df = pd.read_csv(str(output_file)) -334 except FileNotFoundError: -335 print(f"Error: The file {output_file} does not exist. A new file will be created.") -336 csv_df = pd.DataFrame() # If file does not exist, create an empty DataFrame -337 -338 # Merge the existing DataFrame with the new DataFrame -339 merged_df = pd.concat([csv_df, data], ignore_index=True) -340 -341 # Overwrite the original CSV file with the merged DataFrame -342 merged_df.to_csv(output_file, index=False) -343 -344 return merged_df + 298 def save_to_csv(self, output_file): +299 """ +300 Renames the columns of the CTD data table based on a predefined mapping and saves the +301 data to the specified CSV file. If the file already exists, the data is appended to it. +302 +303 Parameters +304 ---------- +305 output_file : str +306 The output CSV file path. +307 """ +308 rsk_labels = { +309 "temperature_00": "Temperature (°C)", +310 "pressure_00": "Pressure (dbar)", +311 "chlorophyll_00": "Chlorophyll a (µg/l)", +312 "seapressure_00": "Sea Pressure (dbar)", +313 "depth_00": "Depth (m)", +314 "salinity_00": "Salinity (PSU)", +315 "speedofsound_00": "Speed of Sound (m/s)", +316 "specificconductivity_00": "Specific Conductivity (µS/cm)", +317 "conductivity_00": "Conductivity (mS/cm)", +318 "density": "Density (kg/m^3) Derived", +319 "salinityabs": "Absolute Salinity (g/kg) Derived", +320 "MLD_Zero": "MLD Zero (m) Derived", +321 "MLD_Ten": "MLD Ten (m) Derived", +322 "stratification": "Stratification (J/m^2) Derived", +323 "mean_surface_density": "Mean Surface Density (kg/m^3) Derived", +324 "overturn": "Overturn (Δρ < -0.05)" +325 } +326 # Renaming columns +327 data = self._ctd_array.copy() +328 if 'filename' in data.columns: +329 data = data[[col for col in data.columns if col != 'filename'] + ['filename']] +330 for key, new_column_name in rsk_labels.items(): +331 if key in data.columns: +332 data = data.rename(columns={key: new_column_name}) +333 data.reset_index(inplace=True, drop=True) +334 try: +335 csv_df = pd.read_csv(str(output_file)) +336 except FileNotFoundError: +337 print(f"Error: The file {output_file} does not exist. A new file will be created.") +338 csv_df = pd.DataFrame() # If file does not exist, create an empty DataFrame +339 +340 # Merge the existing DataFrame with the new DataFrame +341 merged_df = pd.concat([csv_df, data], ignore_index=True) +342 +343 # Overwrite the original CSV file with the merged DataFrame +344 merged_df.to_csv(output_file, index=False) +345 +346 return merged_df @@ -3169,76 +3190,76 @@ Parameters - 346 def plot_depth_salinity_density_mld_line(self): -347 """ -348 Generates a plot of depth vs. salinity and density, applying LOESS smoothing to the data. -349 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. -350 Saves the plot as an image file. -351 """ -352 df = self._ctd_array.copy() -353 filename = self._filename -354 plt.rcParams.update({'font.size': 16}) -355 df_filtered = df -356 if df_filtered.isnull().values.any(): -357 df_filtered.dropna(inplace=True) # Drop rows with NaNs -358 df_filtered = df_filtered.reset_index(drop=True) -359 if len(df_filtered) < 1: -360 return -361 fig, ax1 = plt.subplots(figsize=(18, 18)) -362 ax1.invert_yaxis() -363 # Dynamically set y-axis limits based on depth data -364 max_depth = df_filtered['depth_00'].max() -365 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards -366 lowess = statsmodels.api.nonparametric.lowess -367 salinity_lowess = lowess(df_filtered['salinity_00'], df_filtered['depth_00'], frac=0.1) -368 salinity_depths, salinity_smooth = zip(*salinity_lowess) -369 color_salinity = 'tab:blue' -370 ax1.plot(salinity_smooth, salinity_depths, color=color_salinity, label='Practical Salinity') -371 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) -372 ax1.set_ylabel('Depth (m)') -373 ax1.tick_params(axis='x', labelcolor=color_salinity) -374 density_lowess = lowess(df_filtered['density'], df_filtered['depth_00'], frac=0.1) -375 density_depths, density_smooth = zip(*density_lowess) -376 ax2 = ax1.twiny() -377 color_density = 'tab:red' -378 ax2.plot(density_smooth, density_depths, color=color_density, label='Density (kg/m^3)') -379 ax2.set_xlabel('Density (kg/m^3)', color=color_density) -380 ax2.tick_params(axis='x', labelcolor=color_density) -381 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) -382 mld_cols = [] -383 for col in df.columns: -384 if 'MLD' in col and 'Actual' not in col: -385 mld_cols.append(df[col]) -386 refdepth_cols = [] -387 for col in df.columns: -388 if 'Actual' in col: -389 refdepth_cols.append(df[col]) -390 for idx, mld_col in enumerate(mld_cols): -391 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', -392 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') -393 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', -394 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) -395 if df_filtered['overturn'].any(): -396 plt.title( -397 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " -398 f"\n THIS IS AN UNSTABLE WATER COLUMN " -399 f"\n(Higher density fluid lies above lower density fluid)") -400 else: -401 plt.title( -402 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " -403 f"\n THIS IS AN UNSTABLE WATER COLUMN " -404 f"\n(Higher density fluid lies above lower density fluid)") -405 ax1.grid(True) -406 lines, labels = ax1.get_legend_handles_labels() -407 ax2_legend = ax2.get_legend_handles_labels() -408 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='lower center', bbox_to_anchor=(0.5, -0.15), -409 ncol=3) -410 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes_line.png") -411 plot_folder = os.path.join(self._cwd, "plots") -412 if not (os.path.isdir(plot_folder)): -413 os.mkdir(plot_folder) -414 plt.savefig(plot_path) -415 plt.close(fig) + 348 def plot_depth_salinity_density_mld_line(self): +349 """ +350 Generates a plot of depth vs. salinity and density, applying LOESS smoothing to the data. +351 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +352 Saves the plot as an image file. +353 """ +354 df = self._ctd_array.copy() +355 filename = self._filename +356 plt.rcParams.update({'font.size': 16}) +357 df_filtered = df +358 if df_filtered.isnull().values.any(): +359 df_filtered.dropna(inplace=True) # Drop rows with NaNs +360 df_filtered = df_filtered.reset_index(drop=True) +361 if len(df_filtered) < 1: +362 return +363 fig, ax1 = plt.subplots(figsize=(18, 18)) +364 ax1.invert_yaxis() +365 # Dynamically set y-axis limits based on depth data +366 max_depth = df_filtered['depth_00'].max() +367 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +368 lowess = statsmodels.api.nonparametric.lowess +369 salinity_lowess = lowess(df_filtered['salinity_00'], df_filtered['depth_00'], frac=0.1) +370 salinity_depths, salinity_smooth = zip(*salinity_lowess) +371 color_salinity = 'tab:blue' +372 ax1.plot(salinity_smooth, salinity_depths, color=color_salinity, label='Practical Salinity') +373 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) +374 ax1.set_ylabel('Depth (m)') +375 ax1.tick_params(axis='x', labelcolor=color_salinity) +376 density_lowess = lowess(df_filtered['density'], df_filtered['depth_00'], frac=0.1) +377 density_depths, density_smooth = zip(*density_lowess) +378 ax2 = ax1.twiny() +379 color_density = 'tab:red' +380 ax2.plot(density_smooth, density_depths, color=color_density, label='Density (kg/m^3)') +381 ax2.set_xlabel('Density (kg/m^3)', color=color_density) +382 ax2.tick_params(axis='x', labelcolor=color_density) +383 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) +384 mld_cols = [] +385 for col in df.columns: +386 if 'MLD' in col and 'Actual' not in col: +387 mld_cols.append(df[col]) +388 refdepth_cols = [] +389 for col in df.columns: +390 if 'Actual' in col: +391 refdepth_cols.append(df[col]) +392 for idx, mld_col in enumerate(mld_cols): +393 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +394 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +395 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +396 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +397 if df_filtered['overturn'].any(): +398 plt.title( +399 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " +400 f"\n THIS IS AN UNSTABLE WATER COLUMN " +401 f"\n(Higher density fluid lies above lower density fluid)") +402 else: +403 plt.title( +404 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " +405 f"\n THIS IS AN UNSTABLE WATER COLUMN " +406 f"\n(Higher density fluid lies above lower density fluid)") +407 ax1.grid(True) +408 lines, labels = ax1.get_legend_handles_labels() +409 ax2_legend = ax2.get_legend_handles_labels() +410 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='lower center', bbox_to_anchor=(0.5, -0.15), +411 ncol=3) +412 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes_line.png") +413 plot_folder = os.path.join(self._cwd, "plots") +414 if not (os.path.isdir(plot_folder)): +415 os.mkdir(plot_folder) +416 plt.savefig(plot_path) +417 plt.close(fig) @@ -3260,71 +3281,71 @@ Parameters - 417 def plot_depth_density_salinity_mld_scatter(self): -418 """ -419 Generates a scatter plot of depth vs. salinity and density. -420 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. -421 Saves the plot as an image file. -422 """ -423 df = self._ctd_array.copy() -424 filename = self._filename -425 plt.rcParams.update({'font.size': 16}) -426 df_filtered = df -427 if df_filtered.empty: -428 plt.close() -429 return -430 df_filtered = df_filtered.reset_index(drop=True) -431 fig, ax1 = plt.subplots(figsize=(18, 18)) -432 ax1.invert_yaxis() -433 # Dynamically set y-axis limits based on depth data -434 max_depth = df_filtered['depth_00'].max() -435 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards -436 color_salinity = 'tab:blue' -437 ax1.scatter(df_filtered['salinity_00'], df_filtered['depth_00'], color=color_salinity, -438 label='Practical Salinity') -439 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) -440 ax1.set_ylabel('Depth (m)') -441 ax1.tick_params(axis='x', labelcolor=color_salinity) -442 ax2 = ax1.twiny() -443 color_density = 'tab:red' -444 ax2.scatter(df_filtered['density'], df_filtered['depth_00'], color=color_density, label='Density (kg/m^3)') -445 ax2.set_xlabel('Density (kg/m^3)', color=color_density) -446 ax2.tick_params(axis='x', labelcolor=color_density) -447 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) -448 mld_cols = [] -449 for col in df.columns: -450 if 'MLD' in col and 'Actual' not in col: -451 mld_cols.append(df[col]) -452 refdepth_cols = [] -453 for col in df.columns: -454 if 'Actual' in col: -455 refdepth_cols.append(df[col]) -456 for idx, mld_col in enumerate(mld_cols): -457 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', -458 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') -459 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', -460 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) -461 if df_filtered['overturn'].any(): -462 plt.title( -463 f"{filename}\n Depth vs. Salinity and Density " -464 f"\n THIS IS AN UNSTABLE WATER COLUMN " -465 f"\n(Higher density fluid lies above lower density fluid)") -466 else: -467 plt.title( -468 f"{filename}\n Depth vs. Salinity and Density " -469 f"\n THIS IS AN UNSTABLE WATER COLUMN " -470 f"\n(Higher density fluid lies above lower density fluid)") -471 ax1.grid(True) -472 lines, labels = ax1.get_legend_handles_labels() -473 ax2_legend = ax2.get_legend_handles_labels() -474 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='upper center', bbox_to_anchor=(0.5, -0.15), -475 ncol=3) -476 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes.png") -477 plot_folder = os.path.join(self._cwd, "plots") -478 if not (os.path.isdir(plot_folder)): -479 os.mkdir(plot_folder) -480 plt.savefig(plot_path) -481 plt.close(fig) + 419 def plot_depth_density_salinity_mld_scatter(self): +420 """ +421 Generates a scatter plot of depth vs. salinity and density. +422 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +423 Saves the plot as an image file. +424 """ +425 df = self._ctd_array.copy() +426 filename = self._filename +427 plt.rcParams.update({'font.size': 16}) +428 df_filtered = df +429 if df_filtered.empty: +430 plt.close() +431 return +432 df_filtered = df_filtered.reset_index(drop=True) +433 fig, ax1 = plt.subplots(figsize=(18, 18)) +434 ax1.invert_yaxis() +435 # Dynamically set y-axis limits based on depth data +436 max_depth = df_filtered['depth_00'].max() +437 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +438 color_salinity = 'tab:blue' +439 ax1.scatter(df_filtered['salinity_00'], df_filtered['depth_00'], color=color_salinity, +440 label='Practical Salinity') +441 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) +442 ax1.set_ylabel('Depth (m)') +443 ax1.tick_params(axis='x', labelcolor=color_salinity) +444 ax2 = ax1.twiny() +445 color_density = 'tab:red' +446 ax2.scatter(df_filtered['density'], df_filtered['depth_00'], color=color_density, label='Density (kg/m^3)') +447 ax2.set_xlabel('Density (kg/m^3)', color=color_density) +448 ax2.tick_params(axis='x', labelcolor=color_density) +449 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) +450 mld_cols = [] +451 for col in df.columns: +452 if 'MLD' in col and 'Actual' not in col: +453 mld_cols.append(df[col]) +454 refdepth_cols = [] +455 for col in df.columns: +456 if 'Actual' in col: +457 refdepth_cols.append(df[col]) +458 for idx, mld_col in enumerate(mld_cols): +459 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +460 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +461 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +462 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +463 if df_filtered['overturn'].any(): +464 plt.title( +465 f"{filename}\n Depth vs. Salinity and Density " +466 f"\n THIS IS AN UNSTABLE WATER COLUMN " +467 f"\n(Higher density fluid lies above lower density fluid)") +468 else: +469 plt.title( +470 f"{filename}\n Depth vs. Salinity and Density " +471 f"\n THIS IS AN UNSTABLE WATER COLUMN " +472 f"\n(Higher density fluid lies above lower density fluid)") +473 ax1.grid(True) +474 lines, labels = ax1.get_legend_handles_labels() +475 ax2_legend = ax2.get_legend_handles_labels() +476 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='upper center', bbox_to_anchor=(0.5, -0.15), +477 ncol=3) +478 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes.png") +479 plot_folder = os.path.join(self._cwd, "plots") +480 if not (os.path.isdir(plot_folder)): +481 os.mkdir(plot_folder) +482 plt.savefig(plot_path) +483 plt.close(fig) @@ -3346,64 +3367,64 @@ Parameters - 483 def plot_depth_temperature_scatter(self): -484 """ -485 Generates a scatter plot of depth vs. temperature. -486 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. -487 Saves the plot as an image file. -488 """ -489 df = self._ctd_array.copy() -490 filename = self._filename -491 plt.rcParams.update({'font.size': 16}) -492 df_filtered = df -493 if df_filtered.empty: -494 plt.close() -495 return -496 df_filtered = df_filtered.reset_index(drop=True) -497 fig, ax1 = plt.subplots(figsize=(18, 18)) -498 ax1.invert_yaxis() -499 # Dynamically set y-axis limits based on depth data -500 max_depth = df_filtered['depth_00'].max() -501 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards -502 -503 color_temp = 'tab:blue' -504 ax1.scatter(df_filtered['temperature_00'], df_filtered['depth_00'], color=color_temp, -505 label="Temperature (°C)") -506 ax1.set_xlabel("Temperature (°C)", color=color_temp) -507 ax1.set_ylabel('Depth (m)') -508 ax1.tick_params(axis='x', labelcolor=color_temp) -509 mld_cols = [] -510 for col in df.columns: -511 if "MLD" in col and "Actual" not in col: -512 mld_cols.append(df[col]) -513 refdepth_cols = [] -514 for col in df.columns: -515 if "Reference Depth" in col: -516 refdepth_cols.append(df[col]) -517 for idx, mld_col in enumerate(mld_cols): -518 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', -519 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') -520 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', -521 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) -522 if df_filtered['overturn'].any(): -523 plt.title( -524 f"{filename}\n Depth vs. Temperature \n " -525 f"THIS IS AN UNSTABLE WATER COLUMN \n" -526 f"(Higher density fluid lies above lower density fluid)") -527 else: -528 plt.title( -529 f"{filename}\n Depth vs. Temperature \n " -530 f"THIS IS AN UNSTABLE WATER COLUMN \n" -531 f"(Higher density fluid lies above lower density fluid)") -532 ax1.grid(True) -533 lines, labels = ax1.get_legend_handles_labels() -534 ax1.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3) -535 plot_path = os.path.join(self._cwd, f"plots/{filename}_temperature_depth_plot.png") -536 plot_folder = os.path.join(self._cwd, "plots") -537 if not (os.path.isdir(plot_folder)): -538 os.mkdir(plot_folder) -539 plt.savefig(plot_path) -540 plt.close(fig) + 485 def plot_depth_temperature_scatter(self): +486 """ +487 Generates a scatter plot of depth vs. temperature. +488 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +489 Saves the plot as an image file. +490 """ +491 df = self._ctd_array.copy() +492 filename = self._filename +493 plt.rcParams.update({'font.size': 16}) +494 df_filtered = df +495 if df_filtered.empty: +496 plt.close() +497 return +498 df_filtered = df_filtered.reset_index(drop=True) +499 fig, ax1 = plt.subplots(figsize=(18, 18)) +500 ax1.invert_yaxis() +501 # Dynamically set y-axis limits based on depth data +502 max_depth = df_filtered['depth_00'].max() +503 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +504 +505 color_temp = 'tab:blue' +506 ax1.scatter(df_filtered['temperature_00'], df_filtered['depth_00'], color=color_temp, +507 label="Temperature (°C)") +508 ax1.set_xlabel("Temperature (°C)", color=color_temp) +509 ax1.set_ylabel('Depth (m)') +510 ax1.tick_params(axis='x', labelcolor=color_temp) +511 mld_cols = [] +512 for col in df.columns: +513 if "MLD" in col and "Actual" not in col: +514 mld_cols.append(df[col]) +515 refdepth_cols = [] +516 for col in df.columns: +517 if "Reference Depth" in col: +518 refdepth_cols.append(df[col]) +519 for idx, mld_col in enumerate(mld_cols): +520 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +521 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +522 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +523 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +524 if df_filtered['overturn'].any(): +525 plt.title( +526 f"{filename}\n Depth vs. Temperature \n " +527 f"THIS IS AN UNSTABLE WATER COLUMN \n" +528 f"(Higher density fluid lies above lower density fluid)") +529 else: +530 plt.title( +531 f"{filename}\n Depth vs. Temperature \n " +532 f"THIS IS AN UNSTABLE WATER COLUMN \n" +533 f"(Higher density fluid lies above lower density fluid)") +534 ax1.grid(True) +535 lines, labels = ax1.get_legend_handles_labels() +536 ax1.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3) +537 plot_path = os.path.join(self._cwd, f"plots/{filename}_temperature_depth_plot.png") +538 plot_folder = os.path.join(self._cwd, "plots") +539 if not (os.path.isdir(plot_folder)): +540 os.mkdir(plot_folder) +541 plt.savefig(plot_path) +542 plt.close(fig) @@ -3426,341 +3447,346 @@ Parameters - 542 class Utility: -543 """ -544 Utility -545 -------- -546 Utility class for CTD data processing. -547 -548 Attributes -549 ---------- -550 filename : str -551 Filename of the RSK file. -552 mastersheet : str -553 Path to the master sheet Excel file. -554 """ -555 -556 def __init__(self, filename): -557 """ -558 Initialize a new Utility object. -559 Parameters -560 ---------- -561 filename : str -562 The filename of the RSK file. -563 """ -564 self.filename = filename -565 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) -566 -567 def no_values_in_object(self, object_to_check): -568 """ -569 Checks if the given object is None, empty, or has a length greater than 0. -570 Returns True if the object has no values, False otherwise. -571 -572 Parameters -573 ---------- -574 object_to_check : object -575 The object to check for values. -576 Returns -577 -------- -578 bool -579 True if the object has no values, False otherwise. -580 """ -581 if isinstance(object_to_check, type(None)): -582 return True -583 if object_to_check.empty: + 544 class Utility: +545 """ +546 Utility +547 -------- +548 Utility class for CTD data processing. +549 +550 Attributes +551 ---------- +552 filename : str +553 Filename of the RSK file. +554 mastersheet : str +555 Path to the master sheet Excel file. +556 """ +557 +558 def __init__(self, filename): +559 """ +560 Initialize a new Utility object. +561 Parameters +562 ---------- +563 filename : str +564 The filename of the RSK file. +565 """ +566 self.filename = filename +567 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) +568 +569 def no_values_in_object(self, object_to_check): +570 """ +571 Checks if the given object is None, empty, or has a length greater than 0. +572 Returns True if the object has no values, False otherwise. +573 +574 Parameters +575 ---------- +576 object_to_check : object +577 The object to check for values. +578 Returns +579 -------- +580 bool +581 True if the object has no values, False otherwise. +582 """ +583 if isinstance(object_to_check, type(None)): 584 return True -585 if len(object_to_check) > 0: -586 return False -587 -588 def process_master_sheet(self, master_sheet_path, filename): -589 """ -590 Extracts the date and time components from the filename and compares them with the data -591 in the master sheet. Calculates the absolute differences between the dates and times to -592 find the closest match. Returns the estimated latitude, longitude, and updated filename -593 based on the closest match. -594 -595 Parameters -596 ---------- -597 master_sheet_path : str -598 The path to the master sheet Excel file. -599 -600 filename : str -601 The filename of the RSK file. -602 -603 Returns -604 ------- -605 tuple -606 A tuple containing the estimated latitude, longitude, and updated filename. -607 """ -608 -609 def get_date_from_string(filename): -610 try: -611 year = filename.split('_')[1][:4] -612 month = filename.split('_')[1][4:6] -613 day = filename.split('_')[1][6:] -614 hour = filename.split('_')[2][0:2] -615 minute = filename.split('_')[2][2:4] -616 time = f"{hour}:{minute}" -617 return float(year), float(month), float(day), time -618 except: -619 return None, None, None, None -620 -621 # Function to calculate the absolute difference between two dates -622 def date_difference(row, target_year, target_month, target_day): -623 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( -624 row['day'] - target_day) -625 -626 # Function to calculate the absolute difference between two times -627 def time_difference(target_time, df_time): -628 df_time_str = str(df_time) -629 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] -630 try: -631 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] -632 except: -633 return None -634 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) -635 -636 # Load the master sheet -637 master_df = pd.read_excel(master_sheet_path) -638 # Get date and time components from the filename -639 year, month, day, time = get_date_from_string(filename) -640 if year is None: -641 return -642 # Calculate absolute differences for each row in 'master_df' -643 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) -644 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) -645 # Find the rows with the smallest total difference for date -646 smallest_date_difference = master_df['date_difference'].min() -647 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] -648 # Check if time_difference returns None -649 if closest_date_rows['time_difference'].isnull().any(): -650 closest_time_time = None -651 closest_row_overall = closest_date_rows.iloc[0] -652 else: -653 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference -654 if len(closest_date_rows) > 1: -655 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] -656 closest_row_overall = closest_time_row -657 closest_time_time = closest_row_overall['time_local'] -658 else: -659 closest_row_overall = closest_date_rows.iloc[0] -660 closest_time_time = closest_row_overall['time_local'] -661 latitude = closest_row_overall['latitude'] -662 longitude = closest_row_overall['longitude'] -663 unique_id = closest_row_overall.iloc[0] -664 RBRfilename = filename + "_gpscm" -665 # Access the closest date components -666 closest_date_year = closest_row_overall['year'] -667 closest_date_month = closest_row_overall['month'] -668 closest_date_day = closest_row_overall['day'] -669 # Print the closest date and time -670 print("|-ESTIMATION ALERT-|") -671 print("Had to guess location on file: " + filename) -672 print("Unique ID: " + unique_id) -673 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) -674 print("Lat: " + str(latitude)) -675 print("Long: " + str(longitude)) -676 if closest_time_time: -677 print("Closest Time:", closest_time_time) -678 print("====================") -679 return latitude, longitude, RBRfilename -680 -681 def get_sample_location(self, rsk, filename): -682 """ -683 Retrieves the sample location data from the RSK file. If no location data is found, -684 it attempts to estimate the location using the master sheet. Returns the latitude, -685 longitude, and updated filename. -686 -687 Parameters -688 ---------- -689 rsk : RSK -690 Ruskin object of the RSK file. -691 filename : str -692 The filename of the RSK file. +585 if object_to_check.empty: +586 return True +587 if len(object_to_check) > 0: +588 return False +589 +590 def process_master_sheet(self, master_sheet_path, filename): +591 """ +592 Extracts the date and time components from the filename and compares them with the data +593 in the master sheet. Calculates the absolute differences between the dates and times to +594 find the closest match. Returns the estimated latitude, longitude, and updated filename +595 based on the closest match. +596 +597 Parameters +598 ---------- +599 master_sheet_path : str +600 The path to the master sheet Excel file. +601 +602 filename : str +603 The filename of the RSK file. +604 +605 Returns +606 ------- +607 tuple +608 A tuple containing the estimated latitude, longitude, and updated filename. +609 """ +610 +611 def get_date_from_string(filename): +612 try: +613 year = filename.split('_')[1][:4] +614 month = filename.split('_')[1][4:6] +615 day = filename.split('_')[1][6:] +616 hour = filename.split('_')[2][0:2] +617 minute = filename.split('_')[2][2:4] +618 time = f"{hour}:{minute}" +619 return float(year), float(month), float(day), time +620 except: +621 return None, None, None, None +622 +623 # Function to calculate the absolute difference between two dates +624 def date_difference(row, target_year, target_month, target_day): +625 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( +626 row['day'] - target_day) +627 +628 # Function to calculate the absolute difference between two times +629 def time_difference(target_time, df_time): +630 df_time_str = str(df_time) +631 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] +632 try: +633 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] +634 except: +635 return None +636 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) +637 +638 # Check if the master sheet is already cached +639 if CTD._cached_master_sheet is None: +640 # Load the master sheet and cache it +641 CTD._cached_master_sheet = pd.read_excel(master_sheet_path) +642 +643 # Use the cached master sheet data +644 master_df = CTD._cached_master_sheet.copy() +645 # Get date and time components from the filename +646 year, month, day, time = get_date_from_string(filename) +647 if year is None: +648 return +649 # Calculate absolute differences for each row in 'master_df' +650 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) +651 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) +652 # Find the rows with the smallest total difference for date +653 smallest_date_difference = master_df['date_difference'].min() +654 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] +655 # Check if time_difference returns None +656 if closest_date_rows['time_difference'].isnull().any(): +657 closest_time_time = None +658 closest_row_overall = closest_date_rows.iloc[0] +659 else: +660 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference +661 if len(closest_date_rows) > 1: +662 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] +663 closest_row_overall = closest_time_row +664 closest_time_time = closest_row_overall['time_local'] +665 else: +666 closest_row_overall = closest_date_rows.iloc[0] +667 closest_time_time = closest_row_overall['time_local'] +668 latitude = closest_row_overall['latitude'] +669 longitude = closest_row_overall['longitude'] +670 unique_id = closest_row_overall.iloc[0] +671 RBRfilename = filename + "_gpscm" +672 # Access the closest date components +673 closest_date_year = closest_row_overall['year'] +674 closest_date_month = closest_row_overall['month'] +675 closest_date_day = closest_row_overall['day'] +676 # Print the closest date and time +677 print("|-ESTIMATION ALERT-|") +678 print("Had to guess location on file: " + filename) +679 print("Unique ID: " + unique_id) +680 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) +681 print("Lat: " + str(latitude)) +682 print("Long: " + str(longitude)) +683 if closest_time_time: +684 print("Closest Time:", closest_time_time) +685 print("====================") +686 return latitude, longitude, RBRfilename +687 +688 def get_sample_location(self, rsk, filename): +689 """ +690 Retrieves the sample location data from the RSK file. If no location data is found, +691 it attempts to estimate the location using the master sheet. Returns the latitude, +692 longitude, and updated filename. 693 -694 Returns -695 ------- -696 tuple -697 A tuple containing the latitude associated with the sample, longitude associated with the sample, -698 and the filename, adds _gps if the location was in the ruskin file, -699 _gpscm if located via mastersheet, or _gpserror if unable to locate. -700 """ -701 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one -702 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) -703 if geo_data_length < 1: -704 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -705 self.mastersheet, filename) -706 return latitude_intermediate, longitude_intermediate, filename -707 else: -708 for geo in itertools.islice(rsk.geodata(), None): -709 # Is there geo data? -710 if geo.latitude is not None: -711 # If there is, is it from the southern ocean? -712 if not (geo.latitude > -60): -713 try: -714 latitude_intermediate = geo.latitude[0] -715 longitude_intermediate = geo.longitude[0] -716 filename += "_gps" -717 return latitude_intermediate, longitude_intermediate, filename -718 except: -719 latitude_intermediate = geo.latitude -720 longitude_intermediate = geo.longitude -721 filename += "_gps" -722 return latitude_intermediate, longitude_intermediate, filename -723 else: -724 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -725 self.mastersheet, filename) -726 return latitude_intermediate, longitude_intermediate, filename -727 else: -728 return None, None, filename + 'gpserror' -729 -730 def remove_sample_timezone_indicator(self, df): -731 """ -732 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the -733 given DataFrame. Returns the updated DataFrame. -734 -735 Parameters -736 ---------- -737 df : DataFrame -738 The DataFrame to process. -739 -740 Returns -741 ------- -742 DataFrame -743 The updated DataFrame with the timezone indicator removed. -744 """ -745 if self.no_values_in_object(df): -746 return None -747 if 'timestamp' in df.columns: -748 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] -749 return df -750 else: -751 return df -752 -753 def remove_rows_with_negative_depth(self, df): -754 """ -755 Removes rows from the given DataFrame where the 'depth_00' column has negative values. -756 Returns the updated DataFrame. -757 -758 Parameter -759 --------- -760 df : DataFrame -761 The DataFrame to process. -762 -763 Returns -764 ------- -765 DataFrame -766 The updated DataFrame with rows containing negative depth values removed. -767 """ -768 if self.no_values_in_object(df): -769 return None -770 if 'depth_00' in df.columns: -771 df = df[df['depth_00'] >= 0].reset_index(drop=True) -772 else: -773 return None -774 if self.no_values_in_object(df): -775 return None -776 return df.copy() -777 -778 def remove_rows_with_negative_salinity(self, df): -779 """ -780 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. -781 Returns the updated DataFrame. -782 -783 Parameters -784 ---------- -785 df: DataFrame -786 The DataFrame to process. -787 -788 Returns -789 ------- -790 DataFrame -791 The updated DataFrame with rows containing negative salinity values removed. -792 """ -793 if self.no_values_in_object(df): -794 return None -795 if 'salinity_00' in df.columns: -796 df = df[df['salinity_00'] >= 0].reset_index(drop=True) -797 else: -798 return None -799 if self.no_values_in_object(df): -800 return None -801 return df.copy() -802 -803 def remove_rows_with_negative_pressure(self, df): -804 """ -805 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. -806 Returns the updated DataFrame. -807 -808 Parameters -809 ---------- -810 df: DataFrame -811 The DataFrame to process. -812 -813 Returns -814 ------- -815 DataFrame -816 The updated DataFrame with rows containing negative pressure values removed. -817 """ -818 if self.no_values_in_object(df): -819 return None -820 if 'pressure_00' in df.columns: -821 df = df[df['pressure_00'] >= 0].reset_index(drop=True) -822 else: -823 return None -824 if self.no_values_in_object(df): -825 return None -826 return df.copy() -827 -828 def remove_rows_with_negative_salinityabs(self, df): -829 """ -830 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. -831 Returns the updated DataFrame. -832 -833 Parameters -834 ---------- -835 df: DataFrame -836 The DataFrame to process. -837 -838 Returns -839 ------- -840 DataFrame -841 The updated DataFrame with rows containing negative absolute salinity values removed. -842 """ -843 if self.no_values_in_object(df): -844 return None -845 if 'salinityabs' in df.columns: -846 df = df[df['salinityabs'] >= 0].reset_index(drop=True) -847 else: -848 return None -849 if self.no_values_in_object(df): -850 return None -851 return df.copy() -852 -853 def remove_rows_with_negative_density(self, df): -854 """ -855 Removes rows from the given DataFrame where the 'density' column has negative values. -856 Returns the updated DataFrame. -857 -858 Parameters -859 ---------- -860 df: DataFrame -861 The DataFrame to process. -862 -863 Returns -864 ------- -865 DataFrame -866 The updated DataFrame with rows containing negative density values removed. -867 """ -868 if self.no_values_in_object(df): -869 return None -870 if 'density' in df.columns: -871 df = df[df['density'] >= 0].reset_index(drop=True) -872 else: -873 return None -874 if self.no_values_in_object(df): -875 return None -876 return df.copy() +694 Parameters +695 ---------- +696 rsk : RSK +697 Ruskin object of the RSK file. +698 filename : str +699 The filename of the RSK file. +700 +701 Returns +702 ------- +703 tuple +704 A tuple containing the latitude associated with the sample, longitude associated with the sample, +705 and the filename, adds _gps if the location was in the ruskin file, +706 _gpscm if located via mastersheet, or _gpserror if unable to locate. +707 """ +708 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one +709 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) +710 if geo_data_length < 1: +711 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +712 self.mastersheet, filename) +713 return latitude_intermediate, longitude_intermediate, filename +714 else: +715 for geo in itertools.islice(rsk.geodata(), None): +716 # Is there geo data? +717 if geo.latitude is not None: +718 # If there is, is it from the southern ocean? +719 if not (geo.latitude > -60): +720 try: +721 latitude_intermediate = geo.latitude[0] +722 longitude_intermediate = geo.longitude[0] +723 filename += "_gps" +724 return latitude_intermediate, longitude_intermediate, filename +725 except: +726 latitude_intermediate = geo.latitude +727 longitude_intermediate = geo.longitude +728 filename += "_gps" +729 return latitude_intermediate, longitude_intermediate, filename +730 else: +731 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +732 self.mastersheet, filename) +733 return latitude_intermediate, longitude_intermediate, filename +734 else: +735 return None, None, filename + 'gpserror' +736 +737 def remove_sample_timezone_indicator(self, df): +738 """ +739 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the +740 given DataFrame. Returns the updated DataFrame. +741 +742 Parameters +743 ---------- +744 df : DataFrame +745 The DataFrame to process. +746 +747 Returns +748 ------- +749 DataFrame +750 The updated DataFrame with the timezone indicator removed. +751 """ +752 if self.no_values_in_object(df): +753 return None +754 if 'timestamp' in df.columns: +755 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] +756 return df +757 else: +758 return df +759 +760 def remove_rows_with_negative_depth(self, df): +761 """ +762 Removes rows from the given DataFrame where the 'depth_00' column has negative values. +763 Returns the updated DataFrame. +764 +765 Parameter +766 --------- +767 df : DataFrame +768 The DataFrame to process. +769 +770 Returns +771 ------- +772 DataFrame +773 The updated DataFrame with rows containing negative depth values removed. +774 """ +775 if self.no_values_in_object(df): +776 return None +777 if 'depth_00' in df.columns: +778 df = df[df['depth_00'] >= 0].reset_index(drop=True) +779 else: +780 return None +781 if self.no_values_in_object(df): +782 return None +783 return df.copy() +784 +785 def remove_rows_with_negative_salinity(self, df): +786 """ +787 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. +788 Returns the updated DataFrame. +789 +790 Parameters +791 ---------- +792 df: DataFrame +793 The DataFrame to process. +794 +795 Returns +796 ------- +797 DataFrame +798 The updated DataFrame with rows containing negative salinity values removed. +799 """ +800 if self.no_values_in_object(df): +801 return None +802 if 'salinity_00' in df.columns: +803 df = df[df['salinity_00'] >= 0].reset_index(drop=True) +804 else: +805 return None +806 if self.no_values_in_object(df): +807 return None +808 return df.copy() +809 +810 def remove_rows_with_negative_pressure(self, df): +811 """ +812 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. +813 Returns the updated DataFrame. +814 +815 Parameters +816 ---------- +817 df: DataFrame +818 The DataFrame to process. +819 +820 Returns +821 ------- +822 DataFrame +823 The updated DataFrame with rows containing negative pressure values removed. +824 """ +825 if self.no_values_in_object(df): +826 return None +827 if 'pressure_00' in df.columns: +828 df = df[df['pressure_00'] >= 0].reset_index(drop=True) +829 else: +830 return None +831 if self.no_values_in_object(df): +832 return None +833 return df.copy() +834 +835 def remove_rows_with_negative_salinityabs(self, df): +836 """ +837 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. +838 Returns the updated DataFrame. +839 +840 Parameters +841 ---------- +842 df: DataFrame +843 The DataFrame to process. +844 +845 Returns +846 ------- +847 DataFrame +848 The updated DataFrame with rows containing negative absolute salinity values removed. +849 """ +850 if self.no_values_in_object(df): +851 return None +852 if 'salinityabs' in df.columns: +853 df = df[df['salinityabs'] >= 0].reset_index(drop=True) +854 else: +855 return None +856 if self.no_values_in_object(df): +857 return None +858 return df.copy() +859 +860 def remove_rows_with_negative_density(self, df): +861 """ +862 Removes rows from the given DataFrame where the 'density' column has negative values. +863 Returns the updated DataFrame. +864 +865 Parameters +866 ---------- +867 df: DataFrame +868 The DataFrame to process. +869 +870 Returns +871 ------- +872 DataFrame +873 The updated DataFrame with rows containing negative density values removed. +874 """ +875 if self.no_values_in_object(df): +876 return None +877 if 'density' in df.columns: +878 df = df[df['density'] >= 0].reset_index(drop=True) +879 else: +880 return None +881 if self.no_values_in_object(df): +882 return None +883 return df.copy() @@ -3789,16 +3815,16 @@ Attributes - 556 def __init__(self, filename): -557 """ -558 Initialize a new Utility object. -559 Parameters -560 ---------- -561 filename : str -562 The filename of the RSK file. -563 """ -564 self.filename = filename -565 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) + 558 def __init__(self, filename): +559 """ +560 Initialize a new Utility object. +561 Parameters +562 ---------- +563 filename : str +564 The filename of the RSK file. +565 """ +566 self.filename = filename +567 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) @@ -3847,26 +3873,26 @@ Parameters - 567 def no_values_in_object(self, object_to_check): -568 """ -569 Checks if the given object is None, empty, or has a length greater than 0. -570 Returns True if the object has no values, False otherwise. -571 -572 Parameters -573 ---------- -574 object_to_check : object -575 The object to check for values. -576 Returns -577 -------- -578 bool -579 True if the object has no values, False otherwise. -580 """ -581 if isinstance(object_to_check, type(None)): -582 return True -583 if object_to_check.empty: + 569 def no_values_in_object(self, object_to_check): +570 """ +571 Checks if the given object is None, empty, or has a length greater than 0. +572 Returns True if the object has no values, False otherwise. +573 +574 Parameters +575 ---------- +576 object_to_check : object +577 The object to check for values. +578 Returns +579 -------- +580 bool +581 True if the object has no values, False otherwise. +582 """ +583 if isinstance(object_to_check, type(None)): 584 return True -585 if len(object_to_check) > 0: -586 return False +585 if object_to_check.empty: +586 return True +587 if len(object_to_check) > 0: +588 return False @@ -3900,98 +3926,103 @@ Returns - 588 def process_master_sheet(self, master_sheet_path, filename): -589 """ -590 Extracts the date and time components from the filename and compares them with the data -591 in the master sheet. Calculates the absolute differences between the dates and times to -592 find the closest match. Returns the estimated latitude, longitude, and updated filename -593 based on the closest match. -594 -595 Parameters -596 ---------- -597 master_sheet_path : str -598 The path to the master sheet Excel file. -599 -600 filename : str -601 The filename of the RSK file. -602 -603 Returns -604 ------- -605 tuple -606 A tuple containing the estimated latitude, longitude, and updated filename. -607 """ -608 -609 def get_date_from_string(filename): -610 try: -611 year = filename.split('_')[1][:4] -612 month = filename.split('_')[1][4:6] -613 day = filename.split('_')[1][6:] -614 hour = filename.split('_')[2][0:2] -615 minute = filename.split('_')[2][2:4] -616 time = f"{hour}:{minute}" -617 return float(year), float(month), float(day), time -618 except: -619 return None, None, None, None -620 -621 # Function to calculate the absolute difference between two dates -622 def date_difference(row, target_year, target_month, target_day): -623 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( -624 row['day'] - target_day) -625 -626 # Function to calculate the absolute difference between two times -627 def time_difference(target_time, df_time): -628 df_time_str = str(df_time) -629 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] -630 try: -631 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] -632 except: -633 return None -634 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) -635 -636 # Load the master sheet -637 master_df = pd.read_excel(master_sheet_path) -638 # Get date and time components from the filename -639 year, month, day, time = get_date_from_string(filename) -640 if year is None: -641 return -642 # Calculate absolute differences for each row in 'master_df' -643 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) -644 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) -645 # Find the rows with the smallest total difference for date -646 smallest_date_difference = master_df['date_difference'].min() -647 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] -648 # Check if time_difference returns None -649 if closest_date_rows['time_difference'].isnull().any(): -650 closest_time_time = None -651 closest_row_overall = closest_date_rows.iloc[0] -652 else: -653 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference -654 if len(closest_date_rows) > 1: -655 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] -656 closest_row_overall = closest_time_row -657 closest_time_time = closest_row_overall['time_local'] -658 else: -659 closest_row_overall = closest_date_rows.iloc[0] -660 closest_time_time = closest_row_overall['time_local'] -661 latitude = closest_row_overall['latitude'] -662 longitude = closest_row_overall['longitude'] -663 unique_id = closest_row_overall.iloc[0] -664 RBRfilename = filename + "_gpscm" -665 # Access the closest date components -666 closest_date_year = closest_row_overall['year'] -667 closest_date_month = closest_row_overall['month'] -668 closest_date_day = closest_row_overall['day'] -669 # Print the closest date and time -670 print("|-ESTIMATION ALERT-|") -671 print("Had to guess location on file: " + filename) -672 print("Unique ID: " + unique_id) -673 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) -674 print("Lat: " + str(latitude)) -675 print("Long: " + str(longitude)) -676 if closest_time_time: -677 print("Closest Time:", closest_time_time) -678 print("====================") -679 return latitude, longitude, RBRfilename + 590 def process_master_sheet(self, master_sheet_path, filename): +591 """ +592 Extracts the date and time components from the filename and compares them with the data +593 in the master sheet. Calculates the absolute differences between the dates and times to +594 find the closest match. Returns the estimated latitude, longitude, and updated filename +595 based on the closest match. +596 +597 Parameters +598 ---------- +599 master_sheet_path : str +600 The path to the master sheet Excel file. +601 +602 filename : str +603 The filename of the RSK file. +604 +605 Returns +606 ------- +607 tuple +608 A tuple containing the estimated latitude, longitude, and updated filename. +609 """ +610 +611 def get_date_from_string(filename): +612 try: +613 year = filename.split('_')[1][:4] +614 month = filename.split('_')[1][4:6] +615 day = filename.split('_')[1][6:] +616 hour = filename.split('_')[2][0:2] +617 minute = filename.split('_')[2][2:4] +618 time = f"{hour}:{minute}" +619 return float(year), float(month), float(day), time +620 except: +621 return None, None, None, None +622 +623 # Function to calculate the absolute difference between two dates +624 def date_difference(row, target_year, target_month, target_day): +625 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( +626 row['day'] - target_day) +627 +628 # Function to calculate the absolute difference between two times +629 def time_difference(target_time, df_time): +630 df_time_str = str(df_time) +631 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] +632 try: +633 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] +634 except: +635 return None +636 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) +637 +638 # Check if the master sheet is already cached +639 if CTD._cached_master_sheet is None: +640 # Load the master sheet and cache it +641 CTD._cached_master_sheet = pd.read_excel(master_sheet_path) +642 +643 # Use the cached master sheet data +644 master_df = CTD._cached_master_sheet.copy() +645 # Get date and time components from the filename +646 year, month, day, time = get_date_from_string(filename) +647 if year is None: +648 return +649 # Calculate absolute differences for each row in 'master_df' +650 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) +651 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) +652 # Find the rows with the smallest total difference for date +653 smallest_date_difference = master_df['date_difference'].min() +654 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] +655 # Check if time_difference returns None +656 if closest_date_rows['time_difference'].isnull().any(): +657 closest_time_time = None +658 closest_row_overall = closest_date_rows.iloc[0] +659 else: +660 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference +661 if len(closest_date_rows) > 1: +662 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] +663 closest_row_overall = closest_time_row +664 closest_time_time = closest_row_overall['time_local'] +665 else: +666 closest_row_overall = closest_date_rows.iloc[0] +667 closest_time_time = closest_row_overall['time_local'] +668 latitude = closest_row_overall['latitude'] +669 longitude = closest_row_overall['longitude'] +670 unique_id = closest_row_overall.iloc[0] +671 RBRfilename = filename + "_gpscm" +672 # Access the closest date components +673 closest_date_year = closest_row_overall['year'] +674 closest_date_month = closest_row_overall['month'] +675 closest_date_day = closest_row_overall['day'] +676 # Print the closest date and time +677 print("|-ESTIMATION ALERT-|") +678 print("Had to guess location on file: " + filename) +679 print("Unique ID: " + unique_id) +680 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) +681 print("Lat: " + str(latitude)) +682 print("Long: " + str(longitude)) +683 if closest_time_time: +684 print("Closest Time:", closest_time_time) +685 print("====================") +686 return latitude, longitude, RBRfilename @@ -4029,54 +4060,54 @@ Returns - 681 def get_sample_location(self, rsk, filename): -682 """ -683 Retrieves the sample location data from the RSK file. If no location data is found, -684 it attempts to estimate the location using the master sheet. Returns the latitude, -685 longitude, and updated filename. -686 -687 Parameters -688 ---------- -689 rsk : RSK -690 Ruskin object of the RSK file. -691 filename : str -692 The filename of the RSK file. + 688 def get_sample_location(self, rsk, filename): +689 """ +690 Retrieves the sample location data from the RSK file. If no location data is found, +691 it attempts to estimate the location using the master sheet. Returns the latitude, +692 longitude, and updated filename. 693 -694 Returns -695 ------- -696 tuple -697 A tuple containing the latitude associated with the sample, longitude associated with the sample, -698 and the filename, adds _gps if the location was in the ruskin file, -699 _gpscm if located via mastersheet, or _gpserror if unable to locate. -700 """ -701 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one -702 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) -703 if geo_data_length < 1: -704 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -705 self.mastersheet, filename) -706 return latitude_intermediate, longitude_intermediate, filename -707 else: -708 for geo in itertools.islice(rsk.geodata(), None): -709 # Is there geo data? -710 if geo.latitude is not None: -711 # If there is, is it from the southern ocean? -712 if not (geo.latitude > -60): -713 try: -714 latitude_intermediate = geo.latitude[0] -715 longitude_intermediate = geo.longitude[0] -716 filename += "_gps" -717 return latitude_intermediate, longitude_intermediate, filename -718 except: -719 latitude_intermediate = geo.latitude -720 longitude_intermediate = geo.longitude -721 filename += "_gps" -722 return latitude_intermediate, longitude_intermediate, filename -723 else: -724 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -725 self.mastersheet, filename) -726 return latitude_intermediate, longitude_intermediate, filename -727 else: -728 return None, None, filename + 'gpserror' +694 Parameters +695 ---------- +696 rsk : RSK +697 Ruskin object of the RSK file. +698 filename : str +699 The filename of the RSK file. +700 +701 Returns +702 ------- +703 tuple +704 A tuple containing the latitude associated with the sample, longitude associated with the sample, +705 and the filename, adds _gps if the location was in the ruskin file, +706 _gpscm if located via mastersheet, or _gpserror if unable to locate. +707 """ +708 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one +709 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) +710 if geo_data_length < 1: +711 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +712 self.mastersheet, filename) +713 return latitude_intermediate, longitude_intermediate, filename +714 else: +715 for geo in itertools.islice(rsk.geodata(), None): +716 # Is there geo data? +717 if geo.latitude is not None: +718 # If there is, is it from the southern ocean? +719 if not (geo.latitude > -60): +720 try: +721 latitude_intermediate = geo.latitude[0] +722 longitude_intermediate = geo.longitude[0] +723 filename += "_gps" +724 return latitude_intermediate, longitude_intermediate, filename +725 except: +726 latitude_intermediate = geo.latitude +727 longitude_intermediate = geo.longitude +728 filename += "_gps" +729 return latitude_intermediate, longitude_intermediate, filename +730 else: +731 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +732 self.mastersheet, filename) +733 return latitude_intermediate, longitude_intermediate, filename +734 else: +735 return None, None, filename + 'gpserror' @@ -4115,28 +4146,28 @@ Returns - 730 def remove_sample_timezone_indicator(self, df): -731 """ -732 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the -733 given DataFrame. Returns the updated DataFrame. -734 -735 Parameters -736 ---------- -737 df : DataFrame -738 The DataFrame to process. -739 -740 Returns -741 ------- -742 DataFrame -743 The updated DataFrame with the timezone indicator removed. -744 """ -745 if self.no_values_in_object(df): -746 return None -747 if 'timestamp' in df.columns: -748 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] -749 return df -750 else: -751 return df + 737 def remove_sample_timezone_indicator(self, df): +738 """ +739 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the +740 given DataFrame. Returns the updated DataFrame. +741 +742 Parameters +743 ---------- +744 df : DataFrame +745 The DataFrame to process. +746 +747 Returns +748 ------- +749 DataFrame +750 The updated DataFrame with the timezone indicator removed. +751 """ +752 if self.no_values_in_object(df): +753 return None +754 if 'timestamp' in df.columns: +755 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] +756 return df +757 else: +758 return df @@ -4170,30 +4201,30 @@ Returns - 753 def remove_rows_with_negative_depth(self, df): -754 """ -755 Removes rows from the given DataFrame where the 'depth_00' column has negative values. -756 Returns the updated DataFrame. -757 -758 Parameter -759 --------- -760 df : DataFrame -761 The DataFrame to process. -762 -763 Returns -764 ------- -765 DataFrame -766 The updated DataFrame with rows containing negative depth values removed. -767 """ -768 if self.no_values_in_object(df): -769 return None -770 if 'depth_00' in df.columns: -771 df = df[df['depth_00'] >= 0].reset_index(drop=True) -772 else: -773 return None -774 if self.no_values_in_object(df): -775 return None -776 return df.copy() + 760 def remove_rows_with_negative_depth(self, df): +761 """ +762 Removes rows from the given DataFrame where the 'depth_00' column has negative values. +763 Returns the updated DataFrame. +764 +765 Parameter +766 --------- +767 df : DataFrame +768 The DataFrame to process. +769 +770 Returns +771 ------- +772 DataFrame +773 The updated DataFrame with rows containing negative depth values removed. +774 """ +775 if self.no_values_in_object(df): +776 return None +777 if 'depth_00' in df.columns: +778 df = df[df['depth_00'] >= 0].reset_index(drop=True) +779 else: +780 return None +781 if self.no_values_in_object(df): +782 return None +783 return df.copy() @@ -4225,30 +4256,30 @@ Returns - 778 def remove_rows_with_negative_salinity(self, df): -779 """ -780 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. -781 Returns the updated DataFrame. -782 -783 Parameters -784 ---------- -785 df: DataFrame -786 The DataFrame to process. -787 -788 Returns -789 ------- -790 DataFrame -791 The updated DataFrame with rows containing negative salinity values removed. -792 """ -793 if self.no_values_in_object(df): -794 return None -795 if 'salinity_00' in df.columns: -796 df = df[df['salinity_00'] >= 0].reset_index(drop=True) -797 else: -798 return None -799 if self.no_values_in_object(df): -800 return None -801 return df.copy() + 785 def remove_rows_with_negative_salinity(self, df): +786 """ +787 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. +788 Returns the updated DataFrame. +789 +790 Parameters +791 ---------- +792 df: DataFrame +793 The DataFrame to process. +794 +795 Returns +796 ------- +797 DataFrame +798 The updated DataFrame with rows containing negative salinity values removed. +799 """ +800 if self.no_values_in_object(df): +801 return None +802 if 'salinity_00' in df.columns: +803 df = df[df['salinity_00'] >= 0].reset_index(drop=True) +804 else: +805 return None +806 if self.no_values_in_object(df): +807 return None +808 return df.copy() @@ -4282,30 +4313,30 @@ Returns - 803 def remove_rows_with_negative_pressure(self, df): -804 """ -805 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. -806 Returns the updated DataFrame. -807 -808 Parameters -809 ---------- -810 df: DataFrame -811 The DataFrame to process. -812 -813 Returns -814 ------- -815 DataFrame -816 The updated DataFrame with rows containing negative pressure values removed. -817 """ -818 if self.no_values_in_object(df): -819 return None -820 if 'pressure_00' in df.columns: -821 df = df[df['pressure_00'] >= 0].reset_index(drop=True) -822 else: -823 return None -824 if self.no_values_in_object(df): -825 return None -826 return df.copy() + 810 def remove_rows_with_negative_pressure(self, df): +811 """ +812 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. +813 Returns the updated DataFrame. +814 +815 Parameters +816 ---------- +817 df: DataFrame +818 The DataFrame to process. +819 +820 Returns +821 ------- +822 DataFrame +823 The updated DataFrame with rows containing negative pressure values removed. +824 """ +825 if self.no_values_in_object(df): +826 return None +827 if 'pressure_00' in df.columns: +828 df = df[df['pressure_00'] >= 0].reset_index(drop=True) +829 else: +830 return None +831 if self.no_values_in_object(df): +832 return None +833 return df.copy() @@ -4339,30 +4370,30 @@ Returns - 828 def remove_rows_with_negative_salinityabs(self, df): -829 """ -830 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. -831 Returns the updated DataFrame. -832 -833 Parameters -834 ---------- -835 df: DataFrame -836 The DataFrame to process. -837 -838 Returns -839 ------- -840 DataFrame -841 The updated DataFrame with rows containing negative absolute salinity values removed. -842 """ -843 if self.no_values_in_object(df): -844 return None -845 if 'salinityabs' in df.columns: -846 df = df[df['salinityabs'] >= 0].reset_index(drop=True) -847 else: -848 return None -849 if self.no_values_in_object(df): -850 return None -851 return df.copy() + 835 def remove_rows_with_negative_salinityabs(self, df): +836 """ +837 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. +838 Returns the updated DataFrame. +839 +840 Parameters +841 ---------- +842 df: DataFrame +843 The DataFrame to process. +844 +845 Returns +846 ------- +847 DataFrame +848 The updated DataFrame with rows containing negative absolute salinity values removed. +849 """ +850 if self.no_values_in_object(df): +851 return None +852 if 'salinityabs' in df.columns: +853 df = df[df['salinityabs'] >= 0].reset_index(drop=True) +854 else: +855 return None +856 if self.no_values_in_object(df): +857 return None +858 return df.copy() @@ -4396,30 +4427,30 @@ Returns - 853 def remove_rows_with_negative_density(self, df): -854 """ -855 Removes rows from the given DataFrame where the 'density' column has negative values. -856 Returns the updated DataFrame. -857 -858 Parameters -859 ---------- -860 df: DataFrame -861 The DataFrame to process. -862 -863 Returns -864 ------- -865 DataFrame -866 The updated DataFrame with rows containing negative density values removed. -867 """ -868 if self.no_values_in_object(df): -869 return None -870 if 'density' in df.columns: -871 df = df[df['density'] >= 0].reset_index(drop=True) -872 else: -873 return None -874 if self.no_values_in_object(df): -875 return None -876 return df.copy() + 860 def remove_rows_with_negative_density(self, df): +861 """ +862 Removes rows from the given DataFrame where the 'density' column has negative values. +863 Returns the updated DataFrame. +864 +865 Parameters +866 ---------- +867 df: DataFrame +868 The DataFrame to process. +869 +870 Returns +871 ------- +872 DataFrame +873 The updated DataFrame with rows containing negative density values removed. +874 """ +875 if self.no_values_in_object(df): +876 return None +877 if 'density' in df.columns: +878 df = df[df['density'] >= 0].reset_index(drop=True) +879 else: +880 return None +881 if self.no_values_in_object(df): +882 return None +883 return df.copy() @@ -4454,365 +4485,365 @@ Returns - 879class Calculate: - 880 """ - 881 Calculate - 882 ---------- - 883 - 884 Class for CTD data calculations. - 885 """ - 886 - 887 @staticmethod - 888 def gsw_infunnel(SA, CT, p): - 889 """ - 890 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), - 891 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. - 892 - 893 Parameters - 894 ---------- - 895 SA : Series - 896 Absolute Salinity in g/kg. - 897 CT : Series - 898 Conservative Temperature in degrees Celsius. - 899 p : Series - 900 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). - 901 - 902 Returns - 903 ------- - 904 Series of bool - 905 A boolean array where True indicates the values are inside the "oceanographic funnel". - 906 """ - 907 # Ensure all inputs are Series and aligned - 908 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( - 909 isinstance(p, pd.Series) or np.isscalar(p))): - 910 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") - 911 - 912 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: - 913 raise CTDError("", "Indices of SA, CT, and p must be aligned") - 914 - 915 if np.isscalar(p): - 916 p = pd.Series(p, index=SA.index) - 917 - 918 # Define the funnel conditions - 919 CT_freezing_p = gsw.CT_freezing(SA, p, 0) - 920 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) + 886class Calculate: + 887 """ + 888 Calculate + 889 ---------- + 890 + 891 Class for CTD data calculations. + 892 """ + 893 + 894 @staticmethod + 895 def gsw_infunnel(SA, CT, p): + 896 """ + 897 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), + 898 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. + 899 + 900 Parameters + 901 ---------- + 902 SA : Series + 903 Absolute Salinity in g/kg. + 904 CT : Series + 905 Conservative Temperature in degrees Celsius. + 906 p : Series + 907 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). + 908 + 909 Returns + 910 ------- + 911 Series of bool + 912 A boolean array where True indicates the values are inside the "oceanographic funnel". + 913 """ + 914 # Ensure all inputs are Series and aligned + 915 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( + 916 isinstance(p, pd.Series) or np.isscalar(p))): + 917 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") + 918 + 919 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: + 920 raise CTDError("", "Indices of SA, CT, and p must be aligned") 921 - 922 in_funnel = pd.Series(True, index=SA.index) # Default all to True - 923 condition = ( - 924 (p > 8000) | - 925 (SA < 0) | (SA > 42) | - 926 ((p < 500) & (CT < CT_freezing_p)) | - 927 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | - 928 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | - 929 ((p >= 500) & (CT < CT_freezing_500)) | - 930 ((p >= 6500) & (SA < 30)) | - 931 ((p >= 6500) & (CT > 10.0)) | - 932 SA.isna() | CT.isna() | p.isna() - 933 ) - 934 in_funnel[condition] = False - 935 - 936 return in_funnel - 937 - 938 @staticmethod - 939 def calculate_and_drop_salinity_spikes(df): - 940 """ - 941 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable - 942 changes in salinity with depth. - 943 - 944 Parameters - 945 ---------- - 946 df : DataFrame - 947 DataFrame containing depth and salinity data - 948 - 949 Returns - 950 ------- - 951 DataFrame - 952 DataFrame after removing salinity spikes - 953 """ - 954 acceptable_delta_salinity_per_depth = [ - 955 (0.0005, 0.001), - 956 (0.005, 0.01), - 957 (0.05, 0.1), - 958 (0.5, 1) - 959 ] - 960 if df.empty: - 961 return None - 962 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors - 963 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') - 964 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') - 965 - 966 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN - 967 df = df.dropna(subset=['depth_00', 'salinity_00']) - 968 - 969 # Check if there is enough depth range to calculate - 970 min_depth = df['depth_00'].min() - 971 max_depth = df['depth_00'].max() - 972 if min_depth == max_depth: - 973 print("Insufficient depth range to calculate.") - 974 return df + 922 if np.isscalar(p): + 923 p = pd.Series(p, index=SA.index) + 924 + 925 # Define the funnel conditions + 926 CT_freezing_p = gsw.CT_freezing(SA, p, 0) + 927 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) + 928 + 929 in_funnel = pd.Series(True, index=SA.index) # Default all to True + 930 condition = ( + 931 (p > 8000) | + 932 (SA < 0) | (SA > 42) | + 933 ((p < 500) & (CT < CT_freezing_p)) | + 934 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | + 935 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | + 936 ((p >= 500) & (CT < CT_freezing_500)) | + 937 ((p >= 6500) & (SA < 30)) | + 938 ((p >= 6500) & (CT > 10.0)) | + 939 SA.isna() | CT.isna() | p.isna() + 940 ) + 941 in_funnel[condition] = False + 942 + 943 return in_funnel + 944 + 945 @staticmethod + 946 def calculate_and_drop_salinity_spikes(df): + 947 """ + 948 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable + 949 changes in salinity with depth. + 950 + 951 Parameters + 952 ---------- + 953 df : DataFrame + 954 DataFrame containing depth and salinity data + 955 + 956 Returns + 957 ------- + 958 DataFrame + 959 DataFrame after removing salinity spikes + 960 """ + 961 acceptable_delta_salinity_per_depth = [ + 962 (0.0005, 0.001), + 963 (0.005, 0.01), + 964 (0.05, 0.1), + 965 (0.5, 1) + 966 ] + 967 if df.empty: + 968 return None + 969 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors + 970 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') + 971 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') + 972 + 973 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN + 974 df = df.dropna(subset=['depth_00', 'salinity_00']) 975 - 976 def recursively_drop(df, depth_range, acceptable_delta, i): - 977 try: - 978 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points - 979 except: - 980 print("Error in calculating number of points.") - 981 return df - 982 ranges = np.linspace(min_depth, max_depth, num=num_points) - 983 - 984 # Group by these ranges - 985 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) - 986 - 987 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 - 988 filtered_groups = groups.filter( - 989 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) - 990 # Get the indices of the filtered groups - 991 filtered_indices = filtered_groups.index - 992 return filtered_groups + 976 # Check if there is enough depth range to calculate + 977 min_depth = df['depth_00'].min() + 978 max_depth = df['depth_00'].max() + 979 if min_depth == max_depth: + 980 print("Insufficient depth range to calculate.") + 981 return df + 982 + 983 def recursively_drop(df, depth_range, acceptable_delta, i): + 984 try: + 985 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points + 986 except: + 987 print("Error in calculating number of points.") + 988 return df + 989 ranges = np.linspace(min_depth, max_depth, num=num_points) + 990 + 991 # Group by these ranges + 992 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) 993 - 994 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): - 995 df = recursively_drop(df, deltas[0], deltas[1], i) - 996 return df - 997 - 998 @staticmethod - 999 def calculate_overturns(ctd_array): -1000 """ -1001 Calculates density overturns in the CTD data where denser water lies above lighter water with density -1002 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. -1003 -1004 Parameters -1005 ---------- -1006 ctd_array : DataFrame -1007 DataFrame containing depth, density, and timestamp data -1008 -1009 Returns -1010 ------- -1011 DataFrame -1012 DataFrame with identified density overturns -1013 """ -1014 # Sort DataFrame by depth in ascending order -1015 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) -1016 # Calculate density change and identify overturns -1017 ctd_array['density_change'] = ctd_array[ -1018 'density'].diff() # Difference in density between consecutive measurements -1019 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 -1020 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) -1021 if 'density_change' in ctd_array.columns: -1022 ctd_array = ctd_array.drop('density_change', axis=1) -1023 return ctd_array -1024 -1025 @staticmethod -1026 def calculate_absolute_density(ctd_array): -1027 """ -1028 Calculates absolute density from the CTD data using the TEOS-10 equations, -1029 ensuring all data points are within the valid oceanographic funnel. -1030 -1031 Parameters -1032 ---------- -1033 ctd_array : DataFrame -1034 DataFrame containing salinity, temperature, and pressure data -1035 -1036 Returns -1037 ------- -1038 Series -1039 Series with calculated absolute density -1040 """ -1041 SA = ctd_array['salinity_00'] -1042 t = ctd_array['temperature_00'] -1043 p = ctd_array['pressure_00'] -1044 CT = gsw.CT_from_t(SA, t, p) -1045 if Calculate.gsw_infunnel(SA, CT, p).all(): -1046 return gsw.density.rho_t_exact(SA, t, p) -1047 else: -1048 raise CTDError("", "Sample not in funnel, could not calculate density.") -1049 -1050 @staticmethod -1051 def calculate_absolute_salinity(ctd_array): -1052 """ -1053 Calculates absolute salinity from practical salinity, pressure, -1054 and geographical coordinates using the TEOS-10 salinity conversion formulas. -1055 -1056 Parameters -1057 ---------- -1058 ctd_array : DataFrame -1059 DataFrame containing practical salinity, pressure, longitude, and latitude data -1060 -1061 Returns -1062 ------- -1063 Series -1064 Series with calculated absolute salinity -1065 """ -1066 SP = ctd_array['salinity_00'] -1067 p = ctd_array['pressure_00'] -1068 lon = ctd_array['longitude'] -1069 lat = ctd_array['latitude'] -1070 return gsw.conversions.SA_from_SP(SP, p, lon, lat) -1071 -1072 @staticmethod -1073 def calculate_mld(densities, depths, reference_depth, delta = 0.03): -1074 """ -1075 Calculates the mixed layer depth (MLD) using the density threshold method. -1076 MLD is the depth at which the density exceeds the reference density -1077 by a predefined amount delta, which defaults to (0.03 kg/m³). + 994 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 + 995 filtered_groups = groups.filter( + 996 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) + 997 # Get the indices of the filtered groups + 998 filtered_indices = filtered_groups.index + 999 return filtered_groups +1000 +1001 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): +1002 df = recursively_drop(df, deltas[0], deltas[1], i) +1003 return df +1004 +1005 @staticmethod +1006 def calculate_overturns(ctd_array): +1007 """ +1008 Calculates density overturns in the CTD data where denser water lies above lighter water with density +1009 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. +1010 +1011 Parameters +1012 ---------- +1013 ctd_array : DataFrame +1014 DataFrame containing depth, density, and timestamp data +1015 +1016 Returns +1017 ------- +1018 DataFrame +1019 DataFrame with identified density overturns +1020 """ +1021 # Sort DataFrame by depth in ascending order +1022 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) +1023 # Calculate density change and identify overturns +1024 ctd_array['density_change'] = ctd_array[ +1025 'density'].diff() # Difference in density between consecutive measurements +1026 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 +1027 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) +1028 if 'density_change' in ctd_array.columns: +1029 ctd_array = ctd_array.drop('density_change', axis=1) +1030 return ctd_array +1031 +1032 @staticmethod +1033 def calculate_absolute_density(ctd_array): +1034 """ +1035 Calculates absolute density from the CTD data using the TEOS-10 equations, +1036 ensuring all data points are within the valid oceanographic funnel. +1037 +1038 Parameters +1039 ---------- +1040 ctd_array : DataFrame +1041 DataFrame containing salinity, temperature, and pressure data +1042 +1043 Returns +1044 ------- +1045 Series +1046 Series with calculated absolute density +1047 """ +1048 SA = ctd_array['salinity_00'] +1049 t = ctd_array['temperature_00'] +1050 p = ctd_array['pressure_00'] +1051 CT = gsw.CT_from_t(SA, t, p) +1052 if Calculate.gsw_infunnel(SA, CT, p).all(): +1053 return gsw.density.rho_t_exact(SA, t, p) +1054 else: +1055 raise CTDError("", "Sample not in funnel, could not calculate density.") +1056 +1057 @staticmethod +1058 def calculate_absolute_salinity(ctd_array): +1059 """ +1060 Calculates absolute salinity from practical salinity, pressure, +1061 and geographical coordinates using the TEOS-10 salinity conversion formulas. +1062 +1063 Parameters +1064 ---------- +1065 ctd_array : DataFrame +1066 DataFrame containing practical salinity, pressure, longitude, and latitude data +1067 +1068 Returns +1069 ------- +1070 Series +1071 Series with calculated absolute salinity +1072 """ +1073 SP = ctd_array['salinity_00'] +1074 p = ctd_array['pressure_00'] +1075 lon = ctd_array['longitude'] +1076 lat = ctd_array['latitude'] +1077 return gsw.conversions.SA_from_SP(SP, p, lon, lat) 1078 -1079 Parameters -1080 ---------- -1081 densities : Series -1082 Series of densities -1083 depths : Series -1084 Series of depths corresponding to densities -1085 reference_depth : float -1086 The depth at which to anchor the reference density -1087 delta : float, optional -1088 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1089 -1090 Returns -1091 ------- -1092 tuple -1093 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1094 """ -1095 # Convert to numeric and ensure no NaNs remain -1096 densities = densities.apply(pd.to_numeric, errors='coerce') -1097 depths = depths.apply(pd.to_numeric, errors='coerce') -1098 densities = densities.dropna(how='any').reset_index(drop=True) -1099 depths = depths.dropna(how='any').reset_index(drop=True) -1100 reference_depth = int(reference_depth) -1101 if len(depths) == 0 or len(densities) == 0: -1102 return None -1103 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) -1104 sorted_depths, sorted_densities = zip(*sorted_data) -1105 # Determine reference density -1106 reference_density = None -1107 for i, depth in enumerate(sorted_depths): -1108 if depth >= reference_depth: -1109 if depth == reference_depth: -1110 reference_density = sorted_densities[i] -1111 reference_depth = sorted_depths[i] -1112 else: -1113 # Linear interpolation -1114 try: -1115 reference_density = sorted_densities[i - 1] + ( -1116 (sorted_densities[i] - sorted_densities[i - 1]) * ( -1117 (reference_depth - sorted_depths[i - 1]) / -1118 (sorted_depths[i] - sorted_depths[i - 1]))) -1119 except: -1120 raise CTDError("", -1121 f"Insufficient depth range to calculate MLD. " -1122 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") -1123 break -1124 if reference_density is None: -1125 return None -1126 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1127 for depth, density in zip(sorted_depths, sorted_densities): -1128 if density > reference_density + delta and depth >= reference_depth: -1129 return depth, reference_depth -1130 return None # If no depth meets the criterion -1131 -1132 @staticmethod -1133 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): -1134 """ -1135 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and -1136 then determine the depth where the smoothed density exceeds the reference density -1137 by a predefined amount which defaults to 0.03 kg/m³. +1079 @staticmethod +1080 def calculate_mld(densities, depths, reference_depth, delta = 0.03): +1081 """ +1082 Calculates the mixed layer depth (MLD) using the density threshold method. +1083 MLD is the depth at which the density exceeds the reference density +1084 by a predefined amount delta, which defaults to (0.03 kg/m³). +1085 +1086 Parameters +1087 ---------- +1088 densities : Series +1089 Series of densities +1090 depths : Series +1091 Series of depths corresponding to densities +1092 reference_depth : float +1093 The depth at which to anchor the reference density +1094 delta : float, optional +1095 The difference in density which would indicate the MLD, defaults to 0.03 kg/m³. +1096 +1097 Returns +1098 ------- +1099 tuple +1100 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1101 """ +1102 # Convert to numeric and ensure no NaNs remain +1103 densities = densities.apply(pd.to_numeric, errors='coerce') +1104 depths = depths.apply(pd.to_numeric, errors='coerce') +1105 densities = densities.dropna(how='any').reset_index(drop=True) +1106 depths = depths.dropna(how='any').reset_index(drop=True) +1107 reference_depth = int(reference_depth) +1108 if len(depths) == 0 or len(densities) == 0: +1109 return None +1110 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) +1111 sorted_depths, sorted_densities = zip(*sorted_data) +1112 # Determine reference density +1113 reference_density = None +1114 for i, depth in enumerate(sorted_depths): +1115 if depth >= reference_depth: +1116 if depth == reference_depth: +1117 reference_density = sorted_densities[i] +1118 reference_depth = sorted_depths[i] +1119 else: +1120 # Linear interpolation +1121 try: +1122 reference_density = sorted_densities[i - 1] + ( +1123 (sorted_densities[i] - sorted_densities[i - 1]) * ( +1124 (reference_depth - sorted_depths[i - 1]) / +1125 (sorted_depths[i] - sorted_depths[i - 1]))) +1126 except: +1127 raise CTDError("", +1128 f"Insufficient depth range to calculate MLD. " +1129 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") +1130 break +1131 if reference_density is None: +1132 return None +1133 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1134 for depth, density in zip(sorted_depths, sorted_densities): +1135 if density > reference_density + delta and depth >= reference_depth: +1136 return depth, reference_depth +1137 return None # If no depth meets the criterion 1138 -1139 Parameters -1140 ---------- -1141 densities : Series -1142 Series of densities -1143 depths : Series -1144 Series of depths corresponding to densities -1145 reference_depth : -1146 The depth at which to anchor the reference density -1147 delta : float, optional -1148 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1149 -1150 Returns -1151 ------- -1152 tuple -1153 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1154 """ -1155 # Ensure input is pandas Series and drop NA values -1156 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): -1157 densities = densities.dropna().reset_index(drop=True) -1158 depths = depths.dropna().reset_index(drop=True) -1159 -1160 # Convert to numeric and ensure no NaNs remain -1161 densities = densities.apply(pd.to_numeric, errors='coerce') -1162 depths = depths.apply(pd.to_numeric, errors='coerce') -1163 densities = densities.dropna().reset_index(drop=True) -1164 depths = depths.dropna().reset_index(drop=True) -1165 if densities.empty or depths.empty: -1166 return None, None -1167 -1168 # Convert pandas Series to numpy arrays for NumPy operations -1169 densities = densities.to_numpy() -1170 depths = depths.to_numpy() -1171 -1172 # Remove duplicates by averaging densities at the same depth -1173 unique_depths, indices = np.unique(depths, return_inverse=True) -1174 average_densities = np.zeros_like(unique_depths) -1175 np.add.at(average_densities, indices, densities) -1176 counts = np.zeros_like(unique_depths) -1177 np.add.at(counts, indices, 1) -1178 average_densities /= counts -1179 -1180 # Apply LOESS smoothing -1181 lowess = statsmodels.api.nonparametric.lowess -1182 smoothed = lowess(average_densities, unique_depths, frac=0.1) -1183 smoothed_depths, smoothed_densities = zip(*smoothed) -1184 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) -1185 -1186 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1187 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta -1188 and np.array(smoothed_densities) > reference_depth)[0] -1189 if exceeding_indices.size > 0: -1190 mld_depth = smoothed_depths[exceeding_indices[0]] -1191 return mld_depth, reference_depth +1139 @staticmethod +1140 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): +1141 """ +1142 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and +1143 then determine the depth where the smoothed density exceeds the reference density +1144 by a predefined amount which defaults to 0.03 kg/m³. +1145 +1146 Parameters +1147 ---------- +1148 densities : Series +1149 Series of densities +1150 depths : Series +1151 Series of depths corresponding to densities +1152 reference_depth : +1153 The depth at which to anchor the reference density +1154 delta : float, optional +1155 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. +1156 +1157 Returns +1158 ------- +1159 tuple +1160 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1161 """ +1162 # Ensure input is pandas Series and drop NA values +1163 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): +1164 densities = densities.dropna().reset_index(drop=True) +1165 depths = depths.dropna().reset_index(drop=True) +1166 +1167 # Convert to numeric and ensure no NaNs remain +1168 densities = densities.apply(pd.to_numeric, errors='coerce') +1169 depths = depths.apply(pd.to_numeric, errors='coerce') +1170 densities = densities.dropna().reset_index(drop=True) +1171 depths = depths.dropna().reset_index(drop=True) +1172 if densities.empty or depths.empty: +1173 return None, None +1174 +1175 # Convert pandas Series to numpy arrays for NumPy operations +1176 densities = densities.to_numpy() +1177 depths = depths.to_numpy() +1178 +1179 # Remove duplicates by averaging densities at the same depth +1180 unique_depths, indices = np.unique(depths, return_inverse=True) +1181 average_densities = np.zeros_like(unique_depths) +1182 np.add.at(average_densities, indices, densities) +1183 counts = np.zeros_like(unique_depths) +1184 np.add.at(counts, indices, 1) +1185 average_densities /= counts +1186 +1187 # Apply LOESS smoothing +1188 lowess = statsmodels.api.nonparametric.lowess +1189 smoothed = lowess(average_densities, unique_depths, frac=0.1) +1190 smoothed_depths, smoothed_densities = zip(*smoothed) +1191 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) 1192 -1193 return None, None # If no depth meets the criterion -1194 -1195 @staticmethod -1196 def calculate_mean_surface_density(df, range_): -1197 """ -1198 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1193 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1194 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta +1195 and np.array(smoothed_densities) > reference_depth)[0] +1196 if exceeding_indices.size > 0: +1197 mld_depth = smoothed_depths[exceeding_indices[0]] +1198 return mld_depth, reference_depth 1199 -1200 Parameters -1201 ---------- -1202 df : DataFrame -1203 DataFrame containing density data. -1204 range_ : tuple or int -1205 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, -1206 or an integer indicating the number of rows from the start. -1207 -1208 Returns -1209 ------- -1210 float, None -1211 Mean density value of the specified sample or None if unable to calculate. -1212 """ -1213 min_depth = df.index.min() -1214 max_depth = df.index.max() -1215 -1216 if isinstance(range_, tuple): -1217 start, end = range_ -1218 -1219 # Adjust 'start' to ensure it is within the valid range -1220 start = max(start, min_depth) -1221 -1222 # Adjust 'end' to ensure it does not exceed the maximum depth value -1223 end = min(end, max_depth) -1224 -1225 # Ensure start is less than end -1226 if start <= end: -1227 return df.loc[start:end, 'density'].mean() -1228 else: -1229 return None -1230 -1231 elif isinstance(range_, int): -1232 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length -1233 range_ = min(range_, len(df)) -1234 return df.iloc[:range_, df.columns.get_loc('density')].mean() -1235 -1236 else: -1237 raise ValueError("Invalid range type. Must be tuple or int.") +1200 return None, None # If no depth meets the criterion +1201 +1202 @staticmethod +1203 def calculate_mean_surface_density(df, range_): +1204 """ +1205 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1206 +1207 Parameters +1208 ---------- +1209 df : DataFrame +1210 DataFrame containing density data. +1211 range_ : tuple or int +1212 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, +1213 or an integer indicating the number of rows from the start. +1214 +1215 Returns +1216 ------- +1217 float, None +1218 Mean density value of the specified sample or None if unable to calculate. +1219 """ +1220 min_depth = df.index.min() +1221 max_depth = df.index.max() +1222 +1223 if isinstance(range_, tuple): +1224 start, end = range_ +1225 +1226 # Adjust 'start' to ensure it is within the valid range +1227 start = max(start, min_depth) +1228 +1229 # Adjust 'end' to ensure it does not exceed the maximum depth value +1230 end = min(end, max_depth) +1231 +1232 # Ensure start is less than end +1233 if start <= end: +1234 return df.loc[start:end, 'density'].mean() +1235 else: +1236 return None +1237 +1238 elif isinstance(range_, int): +1239 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length +1240 range_ = min(range_, len(df)) +1241 return df.iloc[:range_, df.columns.get_loc('density')].mean() +1242 +1243 else: +1244 raise ValueError("Invalid range type. Must be tuple or int.") @@ -4834,56 +4865,56 @@ Returns - 887 @staticmethod -888 def gsw_infunnel(SA, CT, p): -889 """ -890 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), -891 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. -892 -893 Parameters -894 ---------- -895 SA : Series -896 Absolute Salinity in g/kg. -897 CT : Series -898 Conservative Temperature in degrees Celsius. -899 p : Series -900 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). -901 -902 Returns -903 ------- -904 Series of bool -905 A boolean array where True indicates the values are inside the "oceanographic funnel". -906 """ -907 # Ensure all inputs are Series and aligned -908 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( -909 isinstance(p, pd.Series) or np.isscalar(p))): -910 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") -911 -912 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: -913 raise CTDError("", "Indices of SA, CT, and p must be aligned") -914 -915 if np.isscalar(p): -916 p = pd.Series(p, index=SA.index) -917 -918 # Define the funnel conditions -919 CT_freezing_p = gsw.CT_freezing(SA, p, 0) -920 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) + 894 @staticmethod +895 def gsw_infunnel(SA, CT, p): +896 """ +897 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), +898 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. +899 +900 Parameters +901 ---------- +902 SA : Series +903 Absolute Salinity in g/kg. +904 CT : Series +905 Conservative Temperature in degrees Celsius. +906 p : Series +907 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). +908 +909 Returns +910 ------- +911 Series of bool +912 A boolean array where True indicates the values are inside the "oceanographic funnel". +913 """ +914 # Ensure all inputs are Series and aligned +915 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( +916 isinstance(p, pd.Series) or np.isscalar(p))): +917 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") +918 +919 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: +920 raise CTDError("", "Indices of SA, CT, and p must be aligned") 921 -922 in_funnel = pd.Series(True, index=SA.index) # Default all to True -923 condition = ( -924 (p > 8000) | -925 (SA < 0) | (SA > 42) | -926 ((p < 500) & (CT < CT_freezing_p)) | -927 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | -928 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | -929 ((p >= 500) & (CT < CT_freezing_500)) | -930 ((p >= 6500) & (SA < 30)) | -931 ((p >= 6500) & (CT > 10.0)) | -932 SA.isna() | CT.isna() | p.isna() -933 ) -934 in_funnel[condition] = False -935 -936 return in_funnel +922 if np.isscalar(p): +923 p = pd.Series(p, index=SA.index) +924 +925 # Define the funnel conditions +926 CT_freezing_p = gsw.CT_freezing(SA, p, 0) +927 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) +928 +929 in_funnel = pd.Series(True, index=SA.index) # Default all to True +930 condition = ( +931 (p > 8000) | +932 (SA < 0) | (SA > 42) | +933 ((p < 500) & (CT < CT_freezing_p)) | +934 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | +935 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | +936 ((p >= 500) & (CT < CT_freezing_500)) | +937 ((p >= 6500) & (SA < 30)) | +938 ((p >= 6500) & (CT > 10.0)) | +939 SA.isna() | CT.isna() | p.isna() +940 ) +941 in_funnel[condition] = False +942 +943 return in_funnel @@ -4922,65 +4953,65 @@ Returns - 938 @staticmethod -939 def calculate_and_drop_salinity_spikes(df): -940 """ -941 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable -942 changes in salinity with depth. -943 -944 Parameters -945 ---------- -946 df : DataFrame -947 DataFrame containing depth and salinity data -948 -949 Returns -950 ------- -951 DataFrame -952 DataFrame after removing salinity spikes -953 """ -954 acceptable_delta_salinity_per_depth = [ -955 (0.0005, 0.001), -956 (0.005, 0.01), -957 (0.05, 0.1), -958 (0.5, 1) -959 ] -960 if df.empty: -961 return None -962 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors -963 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') -964 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') -965 -966 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN -967 df = df.dropna(subset=['depth_00', 'salinity_00']) -968 -969 # Check if there is enough depth range to calculate -970 min_depth = df['depth_00'].min() -971 max_depth = df['depth_00'].max() -972 if min_depth == max_depth: -973 print("Insufficient depth range to calculate.") -974 return df -975 -976 def recursively_drop(df, depth_range, acceptable_delta, i): -977 try: -978 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points -979 except: -980 print("Error in calculating number of points.") -981 return df -982 ranges = np.linspace(min_depth, max_depth, num=num_points) -983 -984 # Group by these ranges -985 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) -986 -987 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 -988 filtered_groups = groups.filter( -989 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) -990 # Get the indices of the filtered groups -991 filtered_indices = filtered_groups.index -992 return filtered_groups -993 -994 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): -995 df = recursively_drop(df, deltas[0], deltas[1], i) -996 return df + 945 @staticmethod + 946 def calculate_and_drop_salinity_spikes(df): + 947 """ + 948 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable + 949 changes in salinity with depth. + 950 + 951 Parameters + 952 ---------- + 953 df : DataFrame + 954 DataFrame containing depth and salinity data + 955 + 956 Returns + 957 ------- + 958 DataFrame + 959 DataFrame after removing salinity spikes + 960 """ + 961 acceptable_delta_salinity_per_depth = [ + 962 (0.0005, 0.001), + 963 (0.005, 0.01), + 964 (0.05, 0.1), + 965 (0.5, 1) + 966 ] + 967 if df.empty: + 968 return None + 969 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors + 970 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') + 971 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') + 972 + 973 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN + 974 df = df.dropna(subset=['depth_00', 'salinity_00']) + 975 + 976 # Check if there is enough depth range to calculate + 977 min_depth = df['depth_00'].min() + 978 max_depth = df['depth_00'].max() + 979 if min_depth == max_depth: + 980 print("Insufficient depth range to calculate.") + 981 return df + 982 + 983 def recursively_drop(df, depth_range, acceptable_delta, i): + 984 try: + 985 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points + 986 except: + 987 print("Error in calculating number of points.") + 988 return df + 989 ranges = np.linspace(min_depth, max_depth, num=num_points) + 990 + 991 # Group by these ranges + 992 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) + 993 + 994 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 + 995 filtered_groups = groups.filter( + 996 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) + 997 # Get the indices of the filtered groups + 998 filtered_indices = filtered_groups.index + 999 return filtered_groups +1000 +1001 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): +1002 df = recursively_drop(df, deltas[0], deltas[1], i) +1003 return df @@ -5015,32 +5046,32 @@ Returns - 998 @staticmethod - 999 def calculate_overturns(ctd_array): -1000 """ -1001 Calculates density overturns in the CTD data where denser water lies above lighter water with density -1002 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. -1003 -1004 Parameters -1005 ---------- -1006 ctd_array : DataFrame -1007 DataFrame containing depth, density, and timestamp data -1008 -1009 Returns -1010 ------- -1011 DataFrame -1012 DataFrame with identified density overturns -1013 """ -1014 # Sort DataFrame by depth in ascending order -1015 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) -1016 # Calculate density change and identify overturns -1017 ctd_array['density_change'] = ctd_array[ -1018 'density'].diff() # Difference in density between consecutive measurements -1019 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 -1020 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) -1021 if 'density_change' in ctd_array.columns: -1022 ctd_array = ctd_array.drop('density_change', axis=1) -1023 return ctd_array + 1005 @staticmethod +1006 def calculate_overturns(ctd_array): +1007 """ +1008 Calculates density overturns in the CTD data where denser water lies above lighter water with density +1009 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. +1010 +1011 Parameters +1012 ---------- +1013 ctd_array : DataFrame +1014 DataFrame containing depth, density, and timestamp data +1015 +1016 Returns +1017 ------- +1018 DataFrame +1019 DataFrame with identified density overturns +1020 """ +1021 # Sort DataFrame by depth in ascending order +1022 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) +1023 # Calculate density change and identify overturns +1024 ctd_array['density_change'] = ctd_array[ +1025 'density'].diff() # Difference in density between consecutive measurements +1026 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 +1027 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) +1028 if 'density_change' in ctd_array.columns: +1029 ctd_array = ctd_array.drop('density_change', axis=1) +1030 return ctd_array @@ -5075,30 +5106,30 @@ Returns - 1025 @staticmethod -1026 def calculate_absolute_density(ctd_array): -1027 """ -1028 Calculates absolute density from the CTD data using the TEOS-10 equations, -1029 ensuring all data points are within the valid oceanographic funnel. -1030 -1031 Parameters -1032 ---------- -1033 ctd_array : DataFrame -1034 DataFrame containing salinity, temperature, and pressure data -1035 -1036 Returns -1037 ------- -1038 Series -1039 Series with calculated absolute density -1040 """ -1041 SA = ctd_array['salinity_00'] -1042 t = ctd_array['temperature_00'] -1043 p = ctd_array['pressure_00'] -1044 CT = gsw.CT_from_t(SA, t, p) -1045 if Calculate.gsw_infunnel(SA, CT, p).all(): -1046 return gsw.density.rho_t_exact(SA, t, p) -1047 else: -1048 raise CTDError("", "Sample not in funnel, could not calculate density.") + 1032 @staticmethod +1033 def calculate_absolute_density(ctd_array): +1034 """ +1035 Calculates absolute density from the CTD data using the TEOS-10 equations, +1036 ensuring all data points are within the valid oceanographic funnel. +1037 +1038 Parameters +1039 ---------- +1040 ctd_array : DataFrame +1041 DataFrame containing salinity, temperature, and pressure data +1042 +1043 Returns +1044 ------- +1045 Series +1046 Series with calculated absolute density +1047 """ +1048 SA = ctd_array['salinity_00'] +1049 t = ctd_array['temperature_00'] +1050 p = ctd_array['pressure_00'] +1051 CT = gsw.CT_from_t(SA, t, p) +1052 if Calculate.gsw_infunnel(SA, CT, p).all(): +1053 return gsw.density.rho_t_exact(SA, t, p) +1054 else: +1055 raise CTDError("", "Sample not in funnel, could not calculate density.") @@ -5133,27 +5164,27 @@ Returns - 1050 @staticmethod -1051 def calculate_absolute_salinity(ctd_array): -1052 """ -1053 Calculates absolute salinity from practical salinity, pressure, -1054 and geographical coordinates using the TEOS-10 salinity conversion formulas. -1055 -1056 Parameters -1057 ---------- -1058 ctd_array : DataFrame -1059 DataFrame containing practical salinity, pressure, longitude, and latitude data -1060 -1061 Returns -1062 ------- -1063 Series -1064 Series with calculated absolute salinity -1065 """ -1066 SP = ctd_array['salinity_00'] -1067 p = ctd_array['pressure_00'] -1068 lon = ctd_array['longitude'] -1069 lat = ctd_array['latitude'] -1070 return gsw.conversions.SA_from_SP(SP, p, lon, lat) + 1057 @staticmethod +1058 def calculate_absolute_salinity(ctd_array): +1059 """ +1060 Calculates absolute salinity from practical salinity, pressure, +1061 and geographical coordinates using the TEOS-10 salinity conversion formulas. +1062 +1063 Parameters +1064 ---------- +1065 ctd_array : DataFrame +1066 DataFrame containing practical salinity, pressure, longitude, and latitude data +1067 +1068 Returns +1069 ------- +1070 Series +1071 Series with calculated absolute salinity +1072 """ +1073 SP = ctd_array['salinity_00'] +1074 p = ctd_array['pressure_00'] +1075 lon = ctd_array['longitude'] +1076 lat = ctd_array['latitude'] +1077 return gsw.conversions.SA_from_SP(SP, p, lon, lat) @@ -5188,65 +5219,65 @@ Returns - 1072 @staticmethod -1073 def calculate_mld(densities, depths, reference_depth, delta = 0.03): -1074 """ -1075 Calculates the mixed layer depth (MLD) using the density threshold method. -1076 MLD is the depth at which the density exceeds the reference density -1077 by a predefined amount delta, which defaults to (0.03 kg/m³). -1078 -1079 Parameters -1080 ---------- -1081 densities : Series -1082 Series of densities -1083 depths : Series -1084 Series of depths corresponding to densities -1085 reference_depth : float -1086 The depth at which to anchor the reference density -1087 delta : float, optional -1088 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1089 -1090 Returns -1091 ------- -1092 tuple -1093 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1094 """ -1095 # Convert to numeric and ensure no NaNs remain -1096 densities = densities.apply(pd.to_numeric, errors='coerce') -1097 depths = depths.apply(pd.to_numeric, errors='coerce') -1098 densities = densities.dropna(how='any').reset_index(drop=True) -1099 depths = depths.dropna(how='any').reset_index(drop=True) -1100 reference_depth = int(reference_depth) -1101 if len(depths) == 0 or len(densities) == 0: -1102 return None -1103 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) -1104 sorted_depths, sorted_densities = zip(*sorted_data) -1105 # Determine reference density -1106 reference_density = None -1107 for i, depth in enumerate(sorted_depths): -1108 if depth >= reference_depth: -1109 if depth == reference_depth: -1110 reference_density = sorted_densities[i] -1111 reference_depth = sorted_depths[i] -1112 else: -1113 # Linear interpolation -1114 try: -1115 reference_density = sorted_densities[i - 1] + ( -1116 (sorted_densities[i] - sorted_densities[i - 1]) * ( -1117 (reference_depth - sorted_depths[i - 1]) / -1118 (sorted_depths[i] - sorted_depths[i - 1]))) -1119 except: -1120 raise CTDError("", -1121 f"Insufficient depth range to calculate MLD. " -1122 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") -1123 break -1124 if reference_density is None: -1125 return None -1126 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1127 for depth, density in zip(sorted_depths, sorted_densities): -1128 if density > reference_density + delta and depth >= reference_depth: -1129 return depth, reference_depth -1130 return None # If no depth meets the criterion + 1079 @staticmethod +1080 def calculate_mld(densities, depths, reference_depth, delta = 0.03): +1081 """ +1082 Calculates the mixed layer depth (MLD) using the density threshold method. +1083 MLD is the depth at which the density exceeds the reference density +1084 by a predefined amount delta, which defaults to (0.03 kg/m³). +1085 +1086 Parameters +1087 ---------- +1088 densities : Series +1089 Series of densities +1090 depths : Series +1091 Series of depths corresponding to densities +1092 reference_depth : float +1093 The depth at which to anchor the reference density +1094 delta : float, optional +1095 The difference in density which would indicate the MLD, defaults to 0.03 kg/m³. +1096 +1097 Returns +1098 ------- +1099 tuple +1100 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1101 """ +1102 # Convert to numeric and ensure no NaNs remain +1103 densities = densities.apply(pd.to_numeric, errors='coerce') +1104 depths = depths.apply(pd.to_numeric, errors='coerce') +1105 densities = densities.dropna(how='any').reset_index(drop=True) +1106 depths = depths.dropna(how='any').reset_index(drop=True) +1107 reference_depth = int(reference_depth) +1108 if len(depths) == 0 or len(densities) == 0: +1109 return None +1110 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) +1111 sorted_depths, sorted_densities = zip(*sorted_data) +1112 # Determine reference density +1113 reference_density = None +1114 for i, depth in enumerate(sorted_depths): +1115 if depth >= reference_depth: +1116 if depth == reference_depth: +1117 reference_density = sorted_densities[i] +1118 reference_depth = sorted_depths[i] +1119 else: +1120 # Linear interpolation +1121 try: +1122 reference_density = sorted_densities[i - 1] + ( +1123 (sorted_densities[i] - sorted_densities[i - 1]) * ( +1124 (reference_depth - sorted_depths[i - 1]) / +1125 (sorted_depths[i] - sorted_depths[i - 1]))) +1126 except: +1127 raise CTDError("", +1128 f"Insufficient depth range to calculate MLD. " +1129 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") +1130 break +1131 if reference_density is None: +1132 return None +1133 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1134 for depth, density in zip(sorted_depths, sorted_densities): +1135 if density > reference_density + delta and depth >= reference_depth: +1136 return depth, reference_depth +1137 return None # If no depth meets the criterion @@ -5264,7 +5295,7 @@ Parameters reference_depth (float): The depth at which to anchor the reference density delta (float, optional): -The difference in density which would indicate the MLD, defaults to 0.03 kg/m. +The difference in density which would indicate the MLD, defaults to 0.03 kg/m³. Returns @@ -5288,68 +5319,68 @@ Returns - 1132 @staticmethod -1133 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): -1134 """ -1135 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and -1136 then determine the depth where the smoothed density exceeds the reference density -1137 by a predefined amount which defaults to 0.03 kg/m³. -1138 -1139 Parameters -1140 ---------- -1141 densities : Series -1142 Series of densities -1143 depths : Series -1144 Series of depths corresponding to densities -1145 reference_depth : -1146 The depth at which to anchor the reference density -1147 delta : float, optional -1148 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1149 -1150 Returns -1151 ------- -1152 tuple -1153 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1154 """ -1155 # Ensure input is pandas Series and drop NA values -1156 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): -1157 densities = densities.dropna().reset_index(drop=True) -1158 depths = depths.dropna().reset_index(drop=True) -1159 -1160 # Convert to numeric and ensure no NaNs remain -1161 densities = densities.apply(pd.to_numeric, errors='coerce') -1162 depths = depths.apply(pd.to_numeric, errors='coerce') -1163 densities = densities.dropna().reset_index(drop=True) -1164 depths = depths.dropna().reset_index(drop=True) -1165 if densities.empty or depths.empty: -1166 return None, None -1167 -1168 # Convert pandas Series to numpy arrays for NumPy operations -1169 densities = densities.to_numpy() -1170 depths = depths.to_numpy() -1171 -1172 # Remove duplicates by averaging densities at the same depth -1173 unique_depths, indices = np.unique(depths, return_inverse=True) -1174 average_densities = np.zeros_like(unique_depths) -1175 np.add.at(average_densities, indices, densities) -1176 counts = np.zeros_like(unique_depths) -1177 np.add.at(counts, indices, 1) -1178 average_densities /= counts -1179 -1180 # Apply LOESS smoothing -1181 lowess = statsmodels.api.nonparametric.lowess -1182 smoothed = lowess(average_densities, unique_depths, frac=0.1) -1183 smoothed_depths, smoothed_densities = zip(*smoothed) -1184 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) -1185 -1186 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1187 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta -1188 and np.array(smoothed_densities) > reference_depth)[0] -1189 if exceeding_indices.size > 0: -1190 mld_depth = smoothed_depths[exceeding_indices[0]] -1191 return mld_depth, reference_depth + 1139 @staticmethod +1140 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): +1141 """ +1142 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and +1143 then determine the depth where the smoothed density exceeds the reference density +1144 by a predefined amount which defaults to 0.03 kg/m³. +1145 +1146 Parameters +1147 ---------- +1148 densities : Series +1149 Series of densities +1150 depths : Series +1151 Series of depths corresponding to densities +1152 reference_depth : +1153 The depth at which to anchor the reference density +1154 delta : float, optional +1155 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. +1156 +1157 Returns +1158 ------- +1159 tuple +1160 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1161 """ +1162 # Ensure input is pandas Series and drop NA values +1163 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): +1164 densities = densities.dropna().reset_index(drop=True) +1165 depths = depths.dropna().reset_index(drop=True) +1166 +1167 # Convert to numeric and ensure no NaNs remain +1168 densities = densities.apply(pd.to_numeric, errors='coerce') +1169 depths = depths.apply(pd.to_numeric, errors='coerce') +1170 densities = densities.dropna().reset_index(drop=True) +1171 depths = depths.dropna().reset_index(drop=True) +1172 if densities.empty or depths.empty: +1173 return None, None +1174 +1175 # Convert pandas Series to numpy arrays for NumPy operations +1176 densities = densities.to_numpy() +1177 depths = depths.to_numpy() +1178 +1179 # Remove duplicates by averaging densities at the same depth +1180 unique_depths, indices = np.unique(depths, return_inverse=True) +1181 average_densities = np.zeros_like(unique_depths) +1182 np.add.at(average_densities, indices, densities) +1183 counts = np.zeros_like(unique_depths) +1184 np.add.at(counts, indices, 1) +1185 average_densities /= counts +1186 +1187 # Apply LOESS smoothing +1188 lowess = statsmodels.api.nonparametric.lowess +1189 smoothed = lowess(average_densities, unique_depths, frac=0.1) +1190 smoothed_depths, smoothed_densities = zip(*smoothed) +1191 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) 1192 -1193 return None, None # If no depth meets the criterion +1193 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1194 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta +1195 and np.array(smoothed_densities) > reference_depth)[0] +1196 if exceeding_indices.size > 0: +1197 mld_depth = smoothed_depths[exceeding_indices[0]] +1198 return mld_depth, reference_depth +1199 +1200 return None, None # If no depth meets the criterion @@ -5390,49 +5421,49 @@ Returns - 1195 @staticmethod -1196 def calculate_mean_surface_density(df, range_): -1197 """ -1198 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. -1199 -1200 Parameters -1201 ---------- -1202 df : DataFrame -1203 DataFrame containing density data. -1204 range_ : tuple or int -1205 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, -1206 or an integer indicating the number of rows from the start. -1207 -1208 Returns -1209 ------- -1210 float, None -1211 Mean density value of the specified sample or None if unable to calculate. -1212 """ -1213 min_depth = df.index.min() -1214 max_depth = df.index.max() -1215 -1216 if isinstance(range_, tuple): -1217 start, end = range_ -1218 -1219 # Adjust 'start' to ensure it is within the valid range -1220 start = max(start, min_depth) -1221 -1222 # Adjust 'end' to ensure it does not exceed the maximum depth value -1223 end = min(end, max_depth) -1224 -1225 # Ensure start is less than end -1226 if start <= end: -1227 return df.loc[start:end, 'density'].mean() -1228 else: -1229 return None -1230 -1231 elif isinstance(range_, int): -1232 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length -1233 range_ = min(range_, len(df)) -1234 return df.iloc[:range_, df.columns.get_loc('density')].mean() -1235 -1236 else: -1237 raise ValueError("Invalid range type. Must be tuple or int.") + 1202 @staticmethod +1203 def calculate_mean_surface_density(df, range_): +1204 """ +1205 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1206 +1207 Parameters +1208 ---------- +1209 df : DataFrame +1210 DataFrame containing density data. +1211 range_ : tuple or int +1212 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, +1213 or an integer indicating the number of rows from the start. +1214 +1215 Returns +1216 ------- +1217 float, None +1218 Mean density value of the specified sample or None if unable to calculate. +1219 """ +1220 min_depth = df.index.min() +1221 max_depth = df.index.max() +1222 +1223 if isinstance(range_, tuple): +1224 start, end = range_ +1225 +1226 # Adjust 'start' to ensure it is within the valid range +1227 start = max(start, min_depth) +1228 +1229 # Adjust 'end' to ensure it does not exceed the maximum depth value +1230 end = min(end, max_depth) +1231 +1232 # Ensure start is less than end +1233 if start <= end: +1234 return df.loc[start:end, 'density'].mean() +1235 else: +1236 return None +1237 +1238 elif isinstance(range_, int): +1239 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length +1240 range_ = min(range_, len(df)) +1241 return df.iloc[:range_, df.columns.get_loc('density')].mean() +1242 +1243 else: +1244 raise ValueError("Invalid range type. Must be tuple or int.") @@ -5469,20 +5500,20 @@ Returns - 1240class CTDError(Exception): -1241 """ -1242 Exception raised for CTD related errors. -1243 -1244 Parameters -1245 ---------- -1246 filename: input dataset which caused the error -1247 message: message -- explanation of the error -1248 """ -1249 -1250 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): -1251 self.filename = filename -1252 self.message = message -1253 super().__init__(self.message) + 1247class CTDError(Exception): +1248 """ +1249 Exception raised for CTD related errors. +1250 +1251 Parameters +1252 ---------- +1253 filename: input dataset which caused the error +1254 message: message -- explanation of the error +1255 """ +1256 +1257 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): +1258 self.filename = filename +1259 self.message = message +1260 super().__init__(self.message) @@ -5507,10 +5538,10 @@ Parameters - 1250 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): -1251 self.filename = filename -1252 self.message = message -1253 super().__init__(self.message) + 1257 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): +1258 self.filename = filename +1259 self.message = message +1260 super().__init__(self.message) @@ -5562,31 +5593,31 @@ Inherited Members - 1256def run_default(plot=False): -1257 _reset_file_environment() -1258 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") -1259 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) -1260 for file in rsk_files_list: -1261 try: -1262 my_data = CTD(file) -1263 my_data.add_filename_to_table() -1264 my_data.save_to_csv("output.csv") -1265 my_data.add_location_to_table() -1266 my_data.remove_non_positive_samples() -1267 my_data.clean("practicalsalinity", 'salinitydiff') -1268 my_data.add_absolute_salinity() -1269 my_data.add_density() -1270 my_data.add_overturns() -1271 my_data.add_mld(1) -1272 my_data.add_mld(5) -1273 my_data.save_to_csv("outputclean.csv") -1274 if plot: -1275 my_data.plot_depth_density_salinity_mld_scatter() -1276 my_data.plot_depth_temperature_scatter() -1277 my_data.plot_depth_salinity_density_mld_line() -1278 except Exception as e: -1279 print(f"Error processing file: '{file}' {e}") -1280 continue + 1263def run_default(plot=False): +1264 _reset_file_environment() +1265 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") +1266 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) +1267 for file in rsk_files_list: +1268 try: +1269 my_data = CTD(file) +1270 my_data.add_filename_to_table() +1271 my_data.save_to_csv("output.csv") +1272 my_data.add_location_to_table() +1273 my_data.remove_non_positive_samples() +1274 my_data.clean("practicalsalinity", 'salinitydiff') +1275 my_data.add_absolute_salinity() +1276 my_data.add_density() +1277 my_data.add_overturns() +1278 my_data.add_mld(1) +1279 my_data.add_mld(5) +1280 my_data.save_to_csv("outputclean.csv") +1281 if plot: +1282 my_data.plot_depth_density_salinity_mld_scatter() +1283 my_data.plot_depth_temperature_scatter() +1284 my_data.plot_depth_salinity_density_mld_line() +1285 except Exception as e: +1286 print(f"Error processing file: '{file}' {e}") +1287 continue @@ -5604,18 +5635,18 @@ Inherited Members - 1283def merge_all_in_folder(): -1284 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") -1285 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) -1286 for file in rsk_files_list: -1287 try: -1288 my_data = CTD(file) -1289 my_data.add_filename_to_table() -1290 my_data.add_location_to_table() -1291 my_data.save_to_csv("output.csv") -1292 except Exception as e: -1293 print(e) -1294 continue + 1290def merge_all_in_folder(): +1291 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") +1292 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) +1293 for file in rsk_files_list: +1294 try: +1295 my_data = CTD(file) +1296 my_data.add_filename_to_table() +1297 my_data.add_location_to_table() +1298 my_data.save_to_csv("output.csv") +1299 except Exception as e: +1300 print(e) +1301 continue @@ -5633,19 +5664,19 @@ Inherited Members - 1296def get_rsk_filenames_in_dir(working_directory): -1297 rsk_files_list = [] -1298 rsk_filenames_no_path = [] -1299 for filename in os.listdir(working_directory): -1300 if filename.endswith('.rsk'): -1301 for filepath in rsk_files_list: -1302 filename_no_path = ('_'.join(filepath.split("/")[-1].split("_")[0:3]).split('.rsk')[0]) -1303 if filename_no_path in rsk_filenames_no_path: -1304 continue -1305 rsk_filenames_no_path.append(filename_no_path) -1306 file_path = os.path.join(working_directory, filename) -1307 rsk_files_list.append(file_path) -1308 return rsk_files_list + 1303def get_rsk_filenames_in_dir(working_directory): +1304 rsk_files_list = [] +1305 rsk_filenames_no_path = [] +1306 for filename in os.listdir(working_directory): +1307 if filename.endswith('.rsk'): +1308 for filepath in rsk_files_list: +1309 filename_no_path = ('_'.join(filepath.split("/")[-1].split("_")[0:3]).split('.rsk')[0]) +1310 if filename_no_path in rsk_filenames_no_path: +1311 continue +1312 rsk_filenames_no_path.append(filename_no_path) +1313 file_path = os.path.join(working_directory, filename) +1314 rsk_files_list.append(file_path) +1315 return rsk_files_list @@ -5663,64 +5694,64 @@ Inherited Members - 1349def main(): -1350 run_default(True) -1351 if len(sys.argv) < 2: -1352 print("Usage: ctdfjorder <command> [arguments]") -1353 print("Commands:") -1354 print(" process <file> Process a single RSK file") -1355 print(" merge Merge all RSK files in the current folder") -1356 print(" default Run the default processing pipeline") -1357 sys.exit(1) -1358 -1359 command = sys.argv[1] -1360 -1361 if command == "process": -1362 if len(sys.argv) < 3: -1363 print("Usage: ctdfjorder process <file>") -1364 sys.exit(1) + 1356def main(): +1357 run_default(True) +1358 if len(sys.argv) < 2: +1359 print("Usage: ctdfjorder <command> [arguments]") +1360 print("Commands:") +1361 print(" process <file> Process a single RSK file") +1362 print(" merge Merge all RSK files in the current folder") +1363 print(" default Run the default processing pipeline") +1364 sys.exit(1) 1365 -1366 file_path = sys.argv[2] -1367 try: -1368 ctd = CTD(file_path) -1369 ctd.add_filename_to_table() -1370 ctd.save_to_csv("output.csv") -1371 ctd.add_location_to_table() -1372 ctd.remove_non_positive_samples() -1373 ctd.clean("practicalsalinity", 'salinitydiff') -1374 ctd.add_absolute_salinity() -1375 ctd.add_density() -1376 ctd.add_overturns() -1377 ctd.add_mld(0) -1378 ctd.add_mld(10) -1379 ctd.save_to_csv("outputclean.csv") -1380 print("Processing completed successfully.") -1381 except Exception as e: -1382 print(f"Error processing file: '{file_path}' {e}") -1383 -1384 elif command == "merge": -1385 merge_all_in_folder() -1386 print("Merging completed successfully.") -1387 -1388 elif command == "default": -1389 run_default() -1390 print("Default processing completed successfully.") -1391 -1392 elif command == "defaultplotall": -1393 run_default(True) -1394 print("Default processing completed successfully.") -1395 -1396 else: -1397 print(f"Unknown command: {command}") -1398 print("Usage: ctdfjorder <command> [arguments]") -1399 print("Commands:") -1400 print(" process <file> Process a single RSK file") -1401 print(" merge Merge all RSK files in the current folder") -1402 print(" default Run the default processing pipeline") -1403 print(" defaultplotall Run the default processing pipeline and create plots") -1404 print("CWD:") -1405 print(_get_cwd()) -1406 sys.exit(1) +1366 command = sys.argv[1] +1367 +1368 if command == "process": +1369 if len(sys.argv) < 3: +1370 print("Usage: ctdfjorder process <file>") +1371 sys.exit(1) +1372 +1373 file_path = sys.argv[2] +1374 try: +1375 ctd = CTD(file_path) +1376 ctd.add_filename_to_table() +1377 ctd.save_to_csv("output.csv") +1378 ctd.add_location_to_table() +1379 ctd.remove_non_positive_samples() +1380 ctd.clean("practicalsalinity", 'salinitydiff') +1381 ctd.add_absolute_salinity() +1382 ctd.add_density() +1383 ctd.add_overturns() +1384 ctd.add_mld(0) +1385 ctd.add_mld(10) +1386 ctd.save_to_csv("outputclean.csv") +1387 print("Processing completed successfully.") +1388 except Exception as e: +1389 print(f"Error processing file: '{file_path}' {e}") +1390 +1391 elif command == "merge": +1392 merge_all_in_folder() +1393 print("Merging completed successfully.") +1394 +1395 elif command == "default": +1396 run_default() +1397 print("Default processing completed successfully.") +1398 +1399 elif command == "defaultplotall": +1400 run_default(True) +1401 print("Default processing completed successfully.") +1402 +1403 else: +1404 print(f"Unknown command: {command}") +1405 print("Usage: ctdfjorder <command> [arguments]") +1406 print("Commands:") +1407 print(" process <file> Process a single RSK file") +1408 print(" merge Merge all RSK files in the current folder") +1409 print(" default Run the default processing pipeline") +1410 print(" defaultplotall Run the default processing pipeline and create plots") +1411 print("CWD:") +1412 print(_get_cwd()) +1413 sys.exit(1) @@ -5728,5 +5759,186 @@ Inherited Members -
72 def __init__(self, rskfilepath): -73 """ -74 Initialize a new CTD object. -75 -76 Parameters -77 ---------- -78 rskfilepath : str -79 The file path to the RSK file. -80 """ -81 self._rsk = RSK(rskfilepath) -82 self._filename = ('_'.join(rskfilepath.split("/")[-1].split("_")[0:3]).split(".rsk")[0]) -83 print("New CTDFjorder Object Created from : " + self._filename) -84 self._ctd_array = np.array(self._rsk.npsamples()) -85 self._ctd_array = pd.DataFrame(self._ctd_array) -86 self.Utility = self.Utility(self._filename) -87 self._cwd = _get_cwd() + 73 def __init__(self, rskfilepath): +74 """ +75 Initialize a new CTD object. +76 +77 Parameters +78 ---------- +79 rskfilepath : str +80 The file path to the RSK file. +81 """ +82 self._rsk = RSK(rskfilepath) +83 self._filename = ('_'.join(rskfilepath.split("/")[-1].split("_")[0:3]).split(".rsk")[0]) +84 print("New CTDFjorder Object Created from : " + self._filename) +85 self._ctd_array = np.array(self._rsk.npsamples()) +86 self._ctd_array = pd.DataFrame(self._ctd_array) +87 self.Utility = self.Utility(self._filename) +88 self._cwd = _get_cwd() @@ -2602,11 +2622,11 @@ Parameters
73 def __init__(self, rskfilepath): +74 """ +75 Initialize a new CTD object. +76 +77 Parameters +78 ---------- +79 rskfilepath : str +80 The file path to the RSK file. +81 """ +82 self._rsk = RSK(rskfilepath) +83 self._filename = ('_'.join(rskfilepath.split("/")[-1].split("_")[0:3]).split(".rsk")[0]) +84 print("New CTDFjorder Object Created from : " + self._filename) +85 self._ctd_array = np.array(self._rsk.npsamples()) +86 self._ctd_array = pd.DataFrame(self._ctd_array) +87 self.Utility = self.Utility(self._filename) +88 self._cwd = _get_cwd()
89 def view_table(self): -90 """ -91 Print the CTD data table. -92 """ -93 print(tabulate(self._ctd_array, headers='keys', tablefmt='psql')) + 90 def view_table(self): +91 """ +92 Print the CTD data table. +93 """ +94 print(tabulate(self._ctd_array, headers='keys', tablefmt='psql')) @@ -2626,21 +2646,21 @@ Parameters
90 def view_table(self): +91 """ +92 Print the CTD data table. +93 """ +94 print(tabulate(self._ctd_array, headers='keys', tablefmt='psql'))
95 def get_pandas_df(self , copy = True): - 96 """ - 97 Exposes the dataframe of the CTD object for custom processes. - 98 - 99 Parameters -100 ---------- -101 copy : bool, optional -102 If True returns a copy, if False returns the actual DataFrame internal to the CTD object. Defaults to True. -103 -104 Returns -105 ------- -106 DataFrame -107 The pandas df of the CTD object. -108 """ -109 return self._ctd_array.copy() if copy is True else self._ctd_array + 96 def get_pandas_df(self , copy = True): + 97 """ + 98 Exposes the dataframe of the CTD object for custom processes. + 99 +100 Parameters +101 ---------- +102 copy : bool, optional +103 If True returns a copy, if False returns the actual DataFrame internal to the CTD object. Defaults to True. +104 +105 Returns +106 ------- +107 DataFrame +108 The pandas df of the CTD object. +109 """ +110 return self._ctd_array.copy() if copy is True else self._ctd_array @@ -2673,11 +2693,11 @@ Returns
96 def get_pandas_df(self , copy = True): + 97 """ + 98 Exposes the dataframe of the CTD object for custom processes. + 99 +100 Parameters +101 ---------- +102 copy : bool, optional +103 If True returns a copy, if False returns the actual DataFrame internal to the CTD object. Defaults to True. +104 +105 Returns +106 ------- +107 DataFrame +108 The pandas df of the CTD object. +109 """ +110 return self._ctd_array.copy() if copy is True else self._ctd_array
111 def add_filename_to_table(self): -112 """ -113 Add the filename to the CTD data table. -114 """ -115 self._ctd_array.assign(filename=self._filename) + 112 def add_filename_to_table(self): +113 """ +114 Add the filename to the CTD data table. +115 """ +116 self._ctd_array.assign(filename=self._filename) @@ -2697,11 +2717,11 @@ Returns
112 def add_filename_to_table(self): +113 """ +114 Add the filename to the CTD data table. +115 """ +116 self._ctd_array.assign(filename=self._filename)
117 def remove_timezone_indicator(self): -118 """ -119 Removes the timezone indicator in the CTD data table 'timestamp' column. -120 """ -121 self._ctd_array = self.Utility.remove_sample_timezone_indicator(self._ctd_array) + 118 def remove_timezone_indicator(self): +119 """ +120 Removes the timezone indicator in the CTD data table 'timestamp' column. +121 """ +122 self._ctd_array = self.Utility.remove_sample_timezone_indicator(self._ctd_array) @@ -2721,22 +2741,23 @@ Returns
118 def remove_timezone_indicator(self): +119 """ +120 Removes the timezone indicator in the CTD data table 'timestamp' column. +121 """ +122 self._ctd_array = self.Utility.remove_sample_timezone_indicator(self._ctd_array)
123 def add_location_to_table(self): -124 """ -125 Retrieves the sample location data from the RSK file and adds it to the CTD data table. -126 If no location data is found, it attempts to estimate the location using the master sheet. -127 """ -128 location_data = self.Utility.get_sample_location(self._rsk, self._filename) -129 if self.Utility.no_values_in_object(self._ctd_array): -130 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -131 try: -132 self._ctd_array = self._ctd_array.assign(latitude=location_data[0], -133 longitude=location_data[1]) -134 except Exception: -135 self._ctd_array.loc['latitude'] = None -136 self._ctd_array.loc['longitude'] = None -137 self._ctd_array.loc['filename'] = None -138 raise CTDError(self._filename, self._NO_LOCATION_ERROR) + 124 def add_location_to_table(self): +125 """ +126 Retrieves the sample location data from the RSK file and adds it to the CTD data table. +127 If no location data is found, it attempts to estimate the location using the master sheet. +128 """ +129 location_data = self.Utility.get_sample_location(self._rsk, self._filename) +130 if self.Utility.no_values_in_object(self._ctd_array): +131 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +132 try: +133 self._ctd_array = self._ctd_array.assign(latitude=location_data[0], +134 longitude=location_data[1], +135 filename=location_data[2]) +136 except Exception: +137 self._ctd_array.loc['latitude'] = None +138 self._ctd_array.loc['longitude'] = None +139 self._ctd_array.loc['filename'] = None +140 raise CTDError(self._filename, self._NO_LOCATION_ERROR) @@ -2757,13 +2778,13 @@ Returns
124 def add_location_to_table(self): +125 """ +126 Retrieves the sample location data from the RSK file and adds it to the CTD data table. +127 If no location data is found, it attempts to estimate the location using the master sheet. +128 """ +129 location_data = self.Utility.get_sample_location(self._rsk, self._filename) +130 if self.Utility.no_values_in_object(self._ctd_array): +131 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +132 try: +133 self._ctd_array = self._ctd_array.assign(latitude=location_data[0], +134 longitude=location_data[1], +135 filename=location_data[2]) +136 except Exception: +137 self._ctd_array.loc['latitude'] = None +138 self._ctd_array.loc['longitude'] = None +139 self._ctd_array.loc['filename'] = None +140 raise CTDError(self._filename, self._NO_LOCATION_ERROR)
139 def remove_upcasts(self): -140 """ -141 Finds the global maximum depth of the sample, and filters out timestamps that occur before it. -142 """ -143 max_depth_index = self._ctd_array['depth_00'].idxmax() -144 max_depth_timestamp = self._ctd_array.loc[max_depth_index, 'timestamp'] -145 self._ctd_array = self._ctd_array[self._ctd_array['timestamp'] >=max_depth_timestamp] + 141 def remove_upcasts(self): +142 """ +143 Finds the global maximum depth of the sample, and filters out timestamps that occur before it. +144 """ +145 max_depth_index = self._ctd_array['depth_00'].idxmax() +146 max_depth_timestamp = self._ctd_array.loc[max_depth_index, 'timestamp'] +147 self._ctd_array = self._ctd_array[self._ctd_array['timestamp'] >=max_depth_timestamp] @@ -2783,27 +2804,27 @@ Returns
141 def remove_upcasts(self): +142 """ +143 Finds the global maximum depth of the sample, and filters out timestamps that occur before it. +144 """ +145 max_depth_index = self._ctd_array['depth_00'].idxmax() +146 max_depth_timestamp = self._ctd_array.loc[max_depth_index, 'timestamp'] +147 self._ctd_array = self._ctd_array[self._ctd_array['timestamp'] >=max_depth_timestamp]
147 def remove_non_positive_samples(self): -148 """ -149 Iterates through the columns of the CTD data table and removes rows with non-positive values -150 for depth, pressure, salinity, absolute salinity, or density. -151 """ -152 if self.Utility.no_values_in_object(self._ctd_array): -153 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -154 for column in self._ctd_array.columns: -155 match column: -156 case 'depth_00': -157 self._ctd_array = self.Utility.remove_rows_with_negative_depth(self._ctd_array) -158 case 'pressure_00': -159 self._ctd_array = self.Utility.remove_rows_with_negative_pressure(self._ctd_array) -160 case 'salinity_00': -161 self._ctd_array = self.Utility.remove_rows_with_negative_salinity(self._ctd_array) -162 case 'salinityabs': -163 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) -164 case 'density': -165 self._ctd_array = self.Utility.remove_rows_with_negative_density(self._ctd_array) -166 if self.Utility.no_values_in_object(self._ctd_array): -167 raise CTDError(self._filename, self._REMOVE_NEGATIVES_ERROR) + 149 def remove_non_positive_samples(self): +150 """ +151 Iterates through the columns of the CTD data table and removes rows with non-positive values +152 for depth, pressure, salinity, absolute salinity, or density. +153 """ +154 if self.Utility.no_values_in_object(self._ctd_array): +155 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +156 for column in self._ctd_array.columns: +157 match column: +158 case 'depth_00': +159 self._ctd_array = self.Utility.remove_rows_with_negative_depth(self._ctd_array) +160 case 'pressure_00': +161 self._ctd_array = self.Utility.remove_rows_with_negative_pressure(self._ctd_array) +162 case 'salinity_00': +163 self._ctd_array = self.Utility.remove_rows_with_negative_salinity(self._ctd_array) +164 case 'salinityabs': +165 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) +166 case 'density': +167 self._ctd_array = self.Utility.remove_rows_with_negative_density(self._ctd_array) +168 if self.Utility.no_values_in_object(self._ctd_array): +169 raise CTDError(self._filename, self._REMOVE_NEGATIVES_ERROR) @@ -2824,36 +2845,36 @@ Returns
149 def remove_non_positive_samples(self): +150 """ +151 Iterates through the columns of the CTD data table and removes rows with non-positive values +152 for depth, pressure, salinity, absolute salinity, or density. +153 """ +154 if self.Utility.no_values_in_object(self._ctd_array): +155 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +156 for column in self._ctd_array.columns: +157 match column: +158 case 'depth_00': +159 self._ctd_array = self.Utility.remove_rows_with_negative_depth(self._ctd_array) +160 case 'pressure_00': +161 self._ctd_array = self.Utility.remove_rows_with_negative_pressure(self._ctd_array) +162 case 'salinity_00': +163 self._ctd_array = self.Utility.remove_rows_with_negative_salinity(self._ctd_array) +164 case 'salinityabs': +165 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) +166 case 'density': +167 self._ctd_array = self.Utility.remove_rows_with_negative_density(self._ctd_array) +168 if self.Utility.no_values_in_object(self._ctd_array): +169 raise CTDError(self._filename, self._REMOVE_NEGATIVES_ERROR)
169 def clean(self, feature, method='salinitydiff'): -170 """ -171 Applies complex data cleaning methods to the specified feature based on the selected method. -172 Currently supports cleaning practical salinity using the 'salinitydiff' method. -173 -174 Parameters -175 ---------- -176 feature : str -177 The feature to clean (e.g., 'practicalsalinity'). -178 method : str, optional -179 The cleaning method to apply, defaults to 'salinitydiff'. -180 Options are 'salinitydiff'. -181 """ -182 if self.Utility.no_values_in_object(self._ctd_array): -183 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -184 supported_features = { -185 "practicalsalinity": "salinity_00" -186 } -187 supported_methods = { -188 "salinitydiff": Calculate.calculate_and_drop_salinity_spikes(self._ctd_array), -189 } -190 if feature in supported_features.keys(): -191 if method in supported_methods.keys(): -192 self._ctd_array.loc[self._ctd_array.index, 'salinity_00'] = supported_methods[method] -193 else: -194 print(f"clean: Invalid method \"{method}\" not in {supported_methods.keys()}") -195 else: -196 print(f"clean: Invalid feature \"{feature}\" not in {supported_features.keys()}.") -197 if self.Utility.no_values_in_object(self._ctd_array): -198 raise CTDError(self._filename, self._DATA_CLEANING_ERROR) + 171 def clean(self, feature, method='salinitydiff'): +172 """ +173 Applies complex data cleaning methods to the specified feature based on the selected method. +174 Currently supports cleaning practical salinity using the 'salinitydiff' method. +175 +176 Parameters +177 ---------- +178 feature : str +179 The feature to clean (e.g., 'practicalsalinity'). +180 method : str, optional +181 The cleaning method to apply, defaults to 'salinitydiff'. +182 Options are 'salinitydiff'. +183 """ +184 if self.Utility.no_values_in_object(self._ctd_array): +185 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +186 supported_features = { +187 "practicalsalinity": "salinity_00" +188 } +189 supported_methods = { +190 "salinitydiff": Calculate.calculate_and_drop_salinity_spikes(self._ctd_array), +191 } +192 if feature in supported_features.keys(): +193 if method in supported_methods.keys(): +194 self._ctd_array.loc[self._ctd_array.index, 'salinity_00'] = supported_methods[method] +195 else: +196 print(f"clean: Invalid method \"{method}\" not in {supported_methods.keys()}") +197 else: +198 print(f"clean: Invalid feature \"{feature}\" not in {supported_features.keys()}.") +199 if self.Utility.no_values_in_object(self._ctd_array): +200 raise CTDError(self._filename, self._DATA_CLEANING_ERROR) @@ -2884,18 +2905,18 @@ Parameters
171 def clean(self, feature, method='salinitydiff'): +172 """ +173 Applies complex data cleaning methods to the specified feature based on the selected method. +174 Currently supports cleaning practical salinity using the 'salinitydiff' method. +175 +176 Parameters +177 ---------- +178 feature : str +179 The feature to clean (e.g., 'practicalsalinity'). +180 method : str, optional +181 The cleaning method to apply, defaults to 'salinitydiff'. +182 Options are 'salinitydiff'. +183 """ +184 if self.Utility.no_values_in_object(self._ctd_array): +185 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +186 supported_features = { +187 "practicalsalinity": "salinity_00" +188 } +189 supported_methods = { +190 "salinitydiff": Calculate.calculate_and_drop_salinity_spikes(self._ctd_array), +191 } +192 if feature in supported_features.keys(): +193 if method in supported_methods.keys(): +194 self._ctd_array.loc[self._ctd_array.index, 'salinity_00'] = supported_methods[method] +195 else: +196 print(f"clean: Invalid method \"{method}\" not in {supported_methods.keys()}") +197 else: +198 print(f"clean: Invalid feature \"{feature}\" not in {supported_features.keys()}.") +199 if self.Utility.no_values_in_object(self._ctd_array): +200 raise CTDError(self._filename, self._DATA_CLEANING_ERROR)
200 def add_absolute_salinity(self): -201 """ -202 Calculates the absolute salinity using the TEOS-10 equations and adds it as a new column -203 to the CTD data table. Removes rows with negative absolute salinity values. -204 """ -205 if self.Utility.no_values_in_object(self._ctd_array): -206 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -207 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = Calculate.calculate_absolute_salinity( -208 self._ctd_array) -209 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) -210 if self.Utility.no_values_in_object(self._ctd_array): -211 raise CTDError(self._filename, self._SALINITYABS_CALCULATION_ERROR) + 202 def add_absolute_salinity(self): +203 """ +204 Calculates the absolute salinity using the TEOS-10 equations and adds it as a new column +205 to the CTD data table. Removes rows with negative absolute salinity values. +206 """ +207 if self.Utility.no_values_in_object(self._ctd_array): +208 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +209 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = Calculate.calculate_absolute_salinity( +210 self._ctd_array) +211 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) +212 if self.Utility.no_values_in_object(self._ctd_array): +213 raise CTDError(self._filename, self._SALINITYABS_CALCULATION_ERROR) @@ -2916,24 +2937,24 @@ Parameters
202 def add_absolute_salinity(self): +203 """ +204 Calculates the absolute salinity using the TEOS-10 equations and adds it as a new column +205 to the CTD data table. Removes rows with negative absolute salinity values. +206 """ +207 if self.Utility.no_values_in_object(self._ctd_array): +208 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +209 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = Calculate.calculate_absolute_salinity( +210 self._ctd_array) +211 self._ctd_array = self.Utility.remove_rows_with_negative_salinityabs(self._ctd_array) +212 if self.Utility.no_values_in_object(self._ctd_array): +213 raise CTDError(self._filename, self._SALINITYABS_CALCULATION_ERROR)
213 def add_density(self): -214 """ -215 Calculates the density using the TEOS-10 equations and adds it as a new column to the CTD -216 data table. If absolute salinity is not present, it is calculated first. -217 """ -218 if self.Utility.no_values_in_object(self._ctd_array): -219 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -220 if 'salinityabs' in self._ctd_array.columns: -221 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( -222 self._ctd_array) -223 else: -224 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = self.add_absolute_salinity() -225 self._ctd_array = Calculate.calculate_absolute_density(self._ctd_array) -226 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( -227 self._ctd_array) -228 self._ctd_array.drop('salinityabs') -229 if self.Utility.no_values_in_object(self._ctd_array): -230 raise CTDError(self._filename, self._DENSITY_CALCULATION_ERROR) + 215 def add_density(self): +216 """ +217 Calculates the density using the TEOS-10 equations and adds it as a new column to the CTD +218 data table. If absolute salinity is not present, it is calculated first. +219 """ +220 if self.Utility.no_values_in_object(self._ctd_array): +221 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +222 if 'salinityabs' in self._ctd_array.columns: +223 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( +224 self._ctd_array) +225 else: +226 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = self.add_absolute_salinity() +227 self._ctd_array = Calculate.calculate_absolute_density(self._ctd_array) +228 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( +229 self._ctd_array) +230 self._ctd_array.drop('salinityabs') +231 if self.Utility.no_values_in_object(self._ctd_array): +232 raise CTDError(self._filename, self._DENSITY_CALCULATION_ERROR) @@ -2954,14 +2975,14 @@ Parameters
215 def add_density(self): +216 """ +217 Calculates the density using the TEOS-10 equations and adds it as a new column to the CTD +218 data table. If absolute salinity is not present, it is calculated first. +219 """ +220 if self.Utility.no_values_in_object(self._ctd_array): +221 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +222 if 'salinityabs' in self._ctd_array.columns: +223 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( +224 self._ctd_array) +225 else: +226 self._ctd_array.loc[self._ctd_array.index, 'salinityabs'] = self.add_absolute_salinity() +227 self._ctd_array = Calculate.calculate_absolute_density(self._ctd_array) +228 self._ctd_array.loc[self._ctd_array.index, 'density'] = Calculate.calculate_absolute_density( +229 self._ctd_array) +230 self._ctd_array.drop('salinityabs') +231 if self.Utility.no_values_in_object(self._ctd_array): +232 raise CTDError(self._filename, self._DENSITY_CALCULATION_ERROR)
232 def add_overturns(self): -233 """ -234 Calculates density changes between consecutive measurements and identifies overturns where -235 denser water lies above less dense water. Adds an 'overturn' column to the CTD data table. -236 """ -237 if self.Utility.no_values_in_object(self._ctd_array): -238 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -239 self._ctd_array = Calculate.calculate_overturns(self._ctd_array.copy()) + 234 def add_overturns(self): +235 """ +236 Calculates density changes between consecutive measurements and identifies overturns where +237 denser water lies above less dense water. Adds an 'overturn' column to the CTD data table. +238 """ +239 if self.Utility.no_values_in_object(self._ctd_array): +240 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +241 self._ctd_array = Calculate.calculate_overturns(self._ctd_array.copy()) @@ -2982,22 +3003,22 @@ Parameters
234 def add_overturns(self): +235 """ +236 Calculates density changes between consecutive measurements and identifies overturns where +237 denser water lies above less dense water. Adds an 'overturn' column to the CTD data table. +238 """ +239 if self.Utility.no_values_in_object(self._ctd_array): +240 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +241 self._ctd_array = Calculate.calculate_overturns(self._ctd_array.copy())
241 def add_mean_surface_density(self, start = 0.0, end = 100.0): -242 """ -243 Calculates the mean surface density from the density values and adds it as a new column -244 to the CTD data table. -245 -246 Parameters -247 ---------- -248 start : float, optional -249 Depth bound, defaults to 0. -250 end : float, optional -251 Depth bound, default to 1. -252 """ -253 if self.Utility.no_values_in_object(self._ctd_array): -254 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -255 mean_surface_density = Calculate.calculate_mean_surface_density(self._ctd_array.copy(), (start, end)) -256 self._ctd_array = self._ctd_array.assign(mean_surface_density=mean_surface_density) + 243 def add_mean_surface_density(self, start = 0.0, end = 100.0): +244 """ +245 Calculates the mean surface density from the density values and adds it as a new column +246 to the CTD data table. +247 +248 Parameters +249 ---------- +250 start : float, optional +251 Depth bound, defaults to 0. +252 end : float, optional +253 Depth bound, default to 1. +254 """ +255 if self.Utility.no_values_in_object(self._ctd_array): +256 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +257 mean_surface_density = Calculate.calculate_mean_surface_density(self._ctd_array.copy(), (start, end)) +258 self._ctd_array = self._ctd_array.assign(mean_surface_density=mean_surface_density) @@ -3027,43 +3048,43 @@ Parameters
243 def add_mean_surface_density(self, start = 0.0, end = 100.0): +244 """ +245 Calculates the mean surface density from the density values and adds it as a new column +246 to the CTD data table. +247 +248 Parameters +249 ---------- +250 start : float, optional +251 Depth bound, defaults to 0. +252 end : float, optional +253 Depth bound, default to 1. +254 """ +255 if self.Utility.no_values_in_object(self._ctd_array): +256 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +257 mean_surface_density = Calculate.calculate_mean_surface_density(self._ctd_array.copy(), (start, end)) +258 self._ctd_array = self._ctd_array.assign(mean_surface_density=mean_surface_density)
258 def add_mld(self, reference, method="default"): -259 """ -260 Calculates the mixed layer depth using the specified method and reference depth. -261 Adds the MLD and the actual reference depth used as new columns to the CTD data table. -262 -263 Parameters -264 ---------- -265 reference : int -266 The reference depth for MLD calculation. -267 method : int -268 The MLD calculation method (default: "default"). -269 """ -270 if self.Utility.no_values_in_object(self._ctd_array): -271 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) -272 copy_ctd_array = self._ctd_array.copy() -273 supported_methods = [ -274 "default" -275 ] -276 unpack = None -277 -278 if method == "default": -279 unpack = Calculate.calculate_mld(copy_ctd_array['density'], copy_ctd_array['depth_00'], -280 reference) -281 else: -282 print(f"add_mld: Invalid method \"{method}\" not in {supported_methods}") -283 unpack = [None, None] -284 if unpack is None: + 260 def add_mld(self, reference, method="default"): +261 """ +262 Calculates the mixed layer depth using the specified method and reference depth. +263 Adds the MLD and the actual reference depth used as new columns to the CTD data table. +264 +265 Parameters +266 ---------- +267 reference : int +268 The reference depth for MLD calculation. +269 method : int +270 The MLD calculation method (default: "default"). +271 """ +272 if self.Utility.no_values_in_object(self._ctd_array): +273 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +274 copy_ctd_array = self._ctd_array.copy() +275 supported_methods = [ +276 "default" +277 ] +278 unpack = None +279 +280 if method == "default": +281 unpack = Calculate.calculate_mld(copy_ctd_array['density'], copy_ctd_array['depth_00'], +282 reference) +283 else: +284 print(f"add_mld: Invalid method \"{method}\" not in {supported_methods}") 285 unpack = [None, None] -286 raise CTDError("MLD could not be calculated.") -287 MLD = unpack[0] -288 depth_used_as_reference = unpack[1] -289 self._ctd_array.loc[self._ctd_array.index, f'MLD {reference}'] = MLD -290 self._ctd_array.loc[ -291 self._ctd_array.index, f'MLD {reference} Actual Reference Depth'] = depth_used_as_reference -292 self._ctd_array = copy_ctd_array.merge(self._ctd_array) -293 if self.Utility.no_values_in_object(self._ctd_array): -294 raise CTDError(self._filename, self._MLD_ERROR) +286 if unpack is None: +287 unpack = [None, None] +288 raise CTDError("MLD could not be calculated.") +289 MLD = unpack[0] +290 depth_used_as_reference = unpack[1] +291 self._ctd_array.loc[self._ctd_array.index, f'MLD {reference}'] = MLD +292 self._ctd_array.loc[ +293 self._ctd_array.index, f'MLD {reference} Actual Reference Depth'] = depth_used_as_reference +294 self._ctd_array = copy_ctd_array.merge(self._ctd_array) +295 if self.Utility.no_values_in_object(self._ctd_array): +296 raise CTDError(self._filename, self._MLD_ERROR) @@ -3093,55 +3114,55 @@ Parameters
260 def add_mld(self, reference, method="default"): +261 """ +262 Calculates the mixed layer depth using the specified method and reference depth. +263 Adds the MLD and the actual reference depth used as new columns to the CTD data table. +264 +265 Parameters +266 ---------- +267 reference : int +268 The reference depth for MLD calculation. +269 method : int +270 The MLD calculation method (default: "default"). +271 """ +272 if self.Utility.no_values_in_object(self._ctd_array): +273 raise CTDError(self._filename, self._NO_SAMPLES_ERROR) +274 copy_ctd_array = self._ctd_array.copy() +275 supported_methods = [ +276 "default" +277 ] +278 unpack = None +279 +280 if method == "default": +281 unpack = Calculate.calculate_mld(copy_ctd_array['density'], copy_ctd_array['depth_00'], +282 reference) +283 else: +284 print(f"add_mld: Invalid method \"{method}\" not in {supported_methods}") 285 unpack = [None, None] -286 raise CTDError("MLD could not be calculated.") -287 MLD = unpack[0] -288 depth_used_as_reference = unpack[1] -289 self._ctd_array.loc[self._ctd_array.index, f'MLD {reference}'] = MLD -290 self._ctd_array.loc[ -291 self._ctd_array.index, f'MLD {reference} Actual Reference Depth'] = depth_used_as_reference -292 self._ctd_array = copy_ctd_array.merge(self._ctd_array) -293 if self.Utility.no_values_in_object(self._ctd_array): -294 raise CTDError(self._filename, self._MLD_ERROR) +286 if unpack is None: +287 unpack = [None, None] +288 raise CTDError("MLD could not be calculated.") +289 MLD = unpack[0] +290 depth_used_as_reference = unpack[1] +291 self._ctd_array.loc[self._ctd_array.index, f'MLD {reference}'] = MLD +292 self._ctd_array.loc[ +293 self._ctd_array.index, f'MLD {reference} Actual Reference Depth'] = depth_used_as_reference +294 self._ctd_array = copy_ctd_array.merge(self._ctd_array) +295 if self.Utility.no_values_in_object(self._ctd_array): +296 raise CTDError(self._filename, self._MLD_ERROR)
296 def save_to_csv(self, output_file): -297 """ -298 Renames the columns of the CTD data table based on a predefined mapping and saves the -299 data to the specified CSV file. If the file already exists, the data is appended to it. -300 -301 Parameters -302 ---------- -303 output_file : str -304 The output CSV file path. -305 """ -306 rsk_labels = { -307 "temperature_00": "Temperature (°C)", -308 "pressure_00": "Pressure (dbar)", -309 "chlorophyll_00": "Chlorophyll a (µg/l)", -310 "seapressure_00": "Sea Pressure (dbar)", -311 "depth_00": "Depth (m)", -312 "salinity_00": "Salinity (PSU)", -313 "speedofsound_00": "Speed of Sound (m/s)", -314 "specificconductivity_00": "Specific Conductivity (µS/cm)", -315 "conductivity_00": "Conductivity (mS/cm)", -316 "density": "Density (kg/m^3) Derived", -317 "salinityabs": "Absolute Salinity (g/kg) Derived", -318 "MLD_Zero": "MLD Zero (m) Derived", -319 "MLD_Ten": "MLD Ten (m) Derived", -320 "stratification": "Stratification (J/m^2) Derived", -321 "mean_surface_density": "Mean Surface Density (kg/m^3) Derived", -322 "overturn": "Overturn (Δρ < -0.05)" -323 } -324 # Renaming columns -325 data = self._ctd_array.copy() -326 if 'filename' in data.columns: -327 data = data[[col for col in data.columns if col != 'filename'] + ['filename']] -328 for key, new_column_name in rsk_labels.items(): -329 if key in data.columns: -330 data = data.rename(columns={key: new_column_name}) -331 data.reset_index(inplace=True, drop=True) -332 try: -333 csv_df = pd.read_csv(str(output_file)) -334 except FileNotFoundError: -335 print(f"Error: The file {output_file} does not exist. A new file will be created.") -336 csv_df = pd.DataFrame() # If file does not exist, create an empty DataFrame -337 -338 # Merge the existing DataFrame with the new DataFrame -339 merged_df = pd.concat([csv_df, data], ignore_index=True) -340 -341 # Overwrite the original CSV file with the merged DataFrame -342 merged_df.to_csv(output_file, index=False) -343 -344 return merged_df + 298 def save_to_csv(self, output_file): +299 """ +300 Renames the columns of the CTD data table based on a predefined mapping and saves the +301 data to the specified CSV file. If the file already exists, the data is appended to it. +302 +303 Parameters +304 ---------- +305 output_file : str +306 The output CSV file path. +307 """ +308 rsk_labels = { +309 "temperature_00": "Temperature (°C)", +310 "pressure_00": "Pressure (dbar)", +311 "chlorophyll_00": "Chlorophyll a (µg/l)", +312 "seapressure_00": "Sea Pressure (dbar)", +313 "depth_00": "Depth (m)", +314 "salinity_00": "Salinity (PSU)", +315 "speedofsound_00": "Speed of Sound (m/s)", +316 "specificconductivity_00": "Specific Conductivity (µS/cm)", +317 "conductivity_00": "Conductivity (mS/cm)", +318 "density": "Density (kg/m^3) Derived", +319 "salinityabs": "Absolute Salinity (g/kg) Derived", +320 "MLD_Zero": "MLD Zero (m) Derived", +321 "MLD_Ten": "MLD Ten (m) Derived", +322 "stratification": "Stratification (J/m^2) Derived", +323 "mean_surface_density": "Mean Surface Density (kg/m^3) Derived", +324 "overturn": "Overturn (Δρ < -0.05)" +325 } +326 # Renaming columns +327 data = self._ctd_array.copy() +328 if 'filename' in data.columns: +329 data = data[[col for col in data.columns if col != 'filename'] + ['filename']] +330 for key, new_column_name in rsk_labels.items(): +331 if key in data.columns: +332 data = data.rename(columns={key: new_column_name}) +333 data.reset_index(inplace=True, drop=True) +334 try: +335 csv_df = pd.read_csv(str(output_file)) +336 except FileNotFoundError: +337 print(f"Error: The file {output_file} does not exist. A new file will be created.") +338 csv_df = pd.DataFrame() # If file does not exist, create an empty DataFrame +339 +340 # Merge the existing DataFrame with the new DataFrame +341 merged_df = pd.concat([csv_df, data], ignore_index=True) +342 +343 # Overwrite the original CSV file with the merged DataFrame +344 merged_df.to_csv(output_file, index=False) +345 +346 return merged_df @@ -3169,76 +3190,76 @@ Parameters
298 def save_to_csv(self, output_file): +299 """ +300 Renames the columns of the CTD data table based on a predefined mapping and saves the +301 data to the specified CSV file. If the file already exists, the data is appended to it. +302 +303 Parameters +304 ---------- +305 output_file : str +306 The output CSV file path. +307 """ +308 rsk_labels = { +309 "temperature_00": "Temperature (°C)", +310 "pressure_00": "Pressure (dbar)", +311 "chlorophyll_00": "Chlorophyll a (µg/l)", +312 "seapressure_00": "Sea Pressure (dbar)", +313 "depth_00": "Depth (m)", +314 "salinity_00": "Salinity (PSU)", +315 "speedofsound_00": "Speed of Sound (m/s)", +316 "specificconductivity_00": "Specific Conductivity (µS/cm)", +317 "conductivity_00": "Conductivity (mS/cm)", +318 "density": "Density (kg/m^3) Derived", +319 "salinityabs": "Absolute Salinity (g/kg) Derived", +320 "MLD_Zero": "MLD Zero (m) Derived", +321 "MLD_Ten": "MLD Ten (m) Derived", +322 "stratification": "Stratification (J/m^2) Derived", +323 "mean_surface_density": "Mean Surface Density (kg/m^3) Derived", +324 "overturn": "Overturn (Δρ < -0.05)" +325 } +326 # Renaming columns +327 data = self._ctd_array.copy() +328 if 'filename' in data.columns: +329 data = data[[col for col in data.columns if col != 'filename'] + ['filename']] +330 for key, new_column_name in rsk_labels.items(): +331 if key in data.columns: +332 data = data.rename(columns={key: new_column_name}) +333 data.reset_index(inplace=True, drop=True) +334 try: +335 csv_df = pd.read_csv(str(output_file)) +336 except FileNotFoundError: +337 print(f"Error: The file {output_file} does not exist. A new file will be created.") +338 csv_df = pd.DataFrame() # If file does not exist, create an empty DataFrame +339 +340 # Merge the existing DataFrame with the new DataFrame +341 merged_df = pd.concat([csv_df, data], ignore_index=True) +342 +343 # Overwrite the original CSV file with the merged DataFrame +344 merged_df.to_csv(output_file, index=False) +345 +346 return merged_df
346 def plot_depth_salinity_density_mld_line(self): -347 """ -348 Generates a plot of depth vs. salinity and density, applying LOESS smoothing to the data. -349 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. -350 Saves the plot as an image file. -351 """ -352 df = self._ctd_array.copy() -353 filename = self._filename -354 plt.rcParams.update({'font.size': 16}) -355 df_filtered = df -356 if df_filtered.isnull().values.any(): -357 df_filtered.dropna(inplace=True) # Drop rows with NaNs -358 df_filtered = df_filtered.reset_index(drop=True) -359 if len(df_filtered) < 1: -360 return -361 fig, ax1 = plt.subplots(figsize=(18, 18)) -362 ax1.invert_yaxis() -363 # Dynamically set y-axis limits based on depth data -364 max_depth = df_filtered['depth_00'].max() -365 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards -366 lowess = statsmodels.api.nonparametric.lowess -367 salinity_lowess = lowess(df_filtered['salinity_00'], df_filtered['depth_00'], frac=0.1) -368 salinity_depths, salinity_smooth = zip(*salinity_lowess) -369 color_salinity = 'tab:blue' -370 ax1.plot(salinity_smooth, salinity_depths, color=color_salinity, label='Practical Salinity') -371 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) -372 ax1.set_ylabel('Depth (m)') -373 ax1.tick_params(axis='x', labelcolor=color_salinity) -374 density_lowess = lowess(df_filtered['density'], df_filtered['depth_00'], frac=0.1) -375 density_depths, density_smooth = zip(*density_lowess) -376 ax2 = ax1.twiny() -377 color_density = 'tab:red' -378 ax2.plot(density_smooth, density_depths, color=color_density, label='Density (kg/m^3)') -379 ax2.set_xlabel('Density (kg/m^3)', color=color_density) -380 ax2.tick_params(axis='x', labelcolor=color_density) -381 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) -382 mld_cols = [] -383 for col in df.columns: -384 if 'MLD' in col and 'Actual' not in col: -385 mld_cols.append(df[col]) -386 refdepth_cols = [] -387 for col in df.columns: -388 if 'Actual' in col: -389 refdepth_cols.append(df[col]) -390 for idx, mld_col in enumerate(mld_cols): -391 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', -392 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') -393 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', -394 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) -395 if df_filtered['overturn'].any(): -396 plt.title( -397 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " -398 f"\n THIS IS AN UNSTABLE WATER COLUMN " -399 f"\n(Higher density fluid lies above lower density fluid)") -400 else: -401 plt.title( -402 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " -403 f"\n THIS IS AN UNSTABLE WATER COLUMN " -404 f"\n(Higher density fluid lies above lower density fluid)") -405 ax1.grid(True) -406 lines, labels = ax1.get_legend_handles_labels() -407 ax2_legend = ax2.get_legend_handles_labels() -408 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='lower center', bbox_to_anchor=(0.5, -0.15), -409 ncol=3) -410 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes_line.png") -411 plot_folder = os.path.join(self._cwd, "plots") -412 if not (os.path.isdir(plot_folder)): -413 os.mkdir(plot_folder) -414 plt.savefig(plot_path) -415 plt.close(fig) + 348 def plot_depth_salinity_density_mld_line(self): +349 """ +350 Generates a plot of depth vs. salinity and density, applying LOESS smoothing to the data. +351 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +352 Saves the plot as an image file. +353 """ +354 df = self._ctd_array.copy() +355 filename = self._filename +356 plt.rcParams.update({'font.size': 16}) +357 df_filtered = df +358 if df_filtered.isnull().values.any(): +359 df_filtered.dropna(inplace=True) # Drop rows with NaNs +360 df_filtered = df_filtered.reset_index(drop=True) +361 if len(df_filtered) < 1: +362 return +363 fig, ax1 = plt.subplots(figsize=(18, 18)) +364 ax1.invert_yaxis() +365 # Dynamically set y-axis limits based on depth data +366 max_depth = df_filtered['depth_00'].max() +367 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +368 lowess = statsmodels.api.nonparametric.lowess +369 salinity_lowess = lowess(df_filtered['salinity_00'], df_filtered['depth_00'], frac=0.1) +370 salinity_depths, salinity_smooth = zip(*salinity_lowess) +371 color_salinity = 'tab:blue' +372 ax1.plot(salinity_smooth, salinity_depths, color=color_salinity, label='Practical Salinity') +373 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) +374 ax1.set_ylabel('Depth (m)') +375 ax1.tick_params(axis='x', labelcolor=color_salinity) +376 density_lowess = lowess(df_filtered['density'], df_filtered['depth_00'], frac=0.1) +377 density_depths, density_smooth = zip(*density_lowess) +378 ax2 = ax1.twiny() +379 color_density = 'tab:red' +380 ax2.plot(density_smooth, density_depths, color=color_density, label='Density (kg/m^3)') +381 ax2.set_xlabel('Density (kg/m^3)', color=color_density) +382 ax2.tick_params(axis='x', labelcolor=color_density) +383 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) +384 mld_cols = [] +385 for col in df.columns: +386 if 'MLD' in col and 'Actual' not in col: +387 mld_cols.append(df[col]) +388 refdepth_cols = [] +389 for col in df.columns: +390 if 'Actual' in col: +391 refdepth_cols.append(df[col]) +392 for idx, mld_col in enumerate(mld_cols): +393 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +394 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +395 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +396 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +397 if df_filtered['overturn'].any(): +398 plt.title( +399 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " +400 f"\n THIS IS AN UNSTABLE WATER COLUMN " +401 f"\n(Higher density fluid lies above lower density fluid)") +402 else: +403 plt.title( +404 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " +405 f"\n THIS IS AN UNSTABLE WATER COLUMN " +406 f"\n(Higher density fluid lies above lower density fluid)") +407 ax1.grid(True) +408 lines, labels = ax1.get_legend_handles_labels() +409 ax2_legend = ax2.get_legend_handles_labels() +410 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='lower center', bbox_to_anchor=(0.5, -0.15), +411 ncol=3) +412 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes_line.png") +413 plot_folder = os.path.join(self._cwd, "plots") +414 if not (os.path.isdir(plot_folder)): +415 os.mkdir(plot_folder) +416 plt.savefig(plot_path) +417 plt.close(fig) @@ -3260,71 +3281,71 @@ Parameters
348 def plot_depth_salinity_density_mld_line(self): +349 """ +350 Generates a plot of depth vs. salinity and density, applying LOESS smoothing to the data. +351 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +352 Saves the plot as an image file. +353 """ +354 df = self._ctd_array.copy() +355 filename = self._filename +356 plt.rcParams.update({'font.size': 16}) +357 df_filtered = df +358 if df_filtered.isnull().values.any(): +359 df_filtered.dropna(inplace=True) # Drop rows with NaNs +360 df_filtered = df_filtered.reset_index(drop=True) +361 if len(df_filtered) < 1: +362 return +363 fig, ax1 = plt.subplots(figsize=(18, 18)) +364 ax1.invert_yaxis() +365 # Dynamically set y-axis limits based on depth data +366 max_depth = df_filtered['depth_00'].max() +367 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +368 lowess = statsmodels.api.nonparametric.lowess +369 salinity_lowess = lowess(df_filtered['salinity_00'], df_filtered['depth_00'], frac=0.1) +370 salinity_depths, salinity_smooth = zip(*salinity_lowess) +371 color_salinity = 'tab:blue' +372 ax1.plot(salinity_smooth, salinity_depths, color=color_salinity, label='Practical Salinity') +373 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) +374 ax1.set_ylabel('Depth (m)') +375 ax1.tick_params(axis='x', labelcolor=color_salinity) +376 density_lowess = lowess(df_filtered['density'], df_filtered['depth_00'], frac=0.1) +377 density_depths, density_smooth = zip(*density_lowess) +378 ax2 = ax1.twiny() +379 color_density = 'tab:red' +380 ax2.plot(density_smooth, density_depths, color=color_density, label='Density (kg/m^3)') +381 ax2.set_xlabel('Density (kg/m^3)', color=color_density) +382 ax2.tick_params(axis='x', labelcolor=color_density) +383 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) +384 mld_cols = [] +385 for col in df.columns: +386 if 'MLD' in col and 'Actual' not in col: +387 mld_cols.append(df[col]) +388 refdepth_cols = [] +389 for col in df.columns: +390 if 'Actual' in col: +391 refdepth_cols.append(df[col]) +392 for idx, mld_col in enumerate(mld_cols): +393 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +394 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +395 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +396 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +397 if df_filtered['overturn'].any(): +398 plt.title( +399 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " +400 f"\n THIS IS AN UNSTABLE WATER COLUMN " +401 f"\n(Higher density fluid lies above lower density fluid)") +402 else: +403 plt.title( +404 f"{filename}\n Depth vs. Salinity and Density with LOESS Transform " +405 f"\n THIS IS AN UNSTABLE WATER COLUMN " +406 f"\n(Higher density fluid lies above lower density fluid)") +407 ax1.grid(True) +408 lines, labels = ax1.get_legend_handles_labels() +409 ax2_legend = ax2.get_legend_handles_labels() +410 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='lower center', bbox_to_anchor=(0.5, -0.15), +411 ncol=3) +412 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes_line.png") +413 plot_folder = os.path.join(self._cwd, "plots") +414 if not (os.path.isdir(plot_folder)): +415 os.mkdir(plot_folder) +416 plt.savefig(plot_path) +417 plt.close(fig)
417 def plot_depth_density_salinity_mld_scatter(self): -418 """ -419 Generates a scatter plot of depth vs. salinity and density. -420 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. -421 Saves the plot as an image file. -422 """ -423 df = self._ctd_array.copy() -424 filename = self._filename -425 plt.rcParams.update({'font.size': 16}) -426 df_filtered = df -427 if df_filtered.empty: -428 plt.close() -429 return -430 df_filtered = df_filtered.reset_index(drop=True) -431 fig, ax1 = plt.subplots(figsize=(18, 18)) -432 ax1.invert_yaxis() -433 # Dynamically set y-axis limits based on depth data -434 max_depth = df_filtered['depth_00'].max() -435 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards -436 color_salinity = 'tab:blue' -437 ax1.scatter(df_filtered['salinity_00'], df_filtered['depth_00'], color=color_salinity, -438 label='Practical Salinity') -439 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) -440 ax1.set_ylabel('Depth (m)') -441 ax1.tick_params(axis='x', labelcolor=color_salinity) -442 ax2 = ax1.twiny() -443 color_density = 'tab:red' -444 ax2.scatter(df_filtered['density'], df_filtered['depth_00'], color=color_density, label='Density (kg/m^3)') -445 ax2.set_xlabel('Density (kg/m^3)', color=color_density) -446 ax2.tick_params(axis='x', labelcolor=color_density) -447 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) -448 mld_cols = [] -449 for col in df.columns: -450 if 'MLD' in col and 'Actual' not in col: -451 mld_cols.append(df[col]) -452 refdepth_cols = [] -453 for col in df.columns: -454 if 'Actual' in col: -455 refdepth_cols.append(df[col]) -456 for idx, mld_col in enumerate(mld_cols): -457 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', -458 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') -459 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', -460 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) -461 if df_filtered['overturn'].any(): -462 plt.title( -463 f"{filename}\n Depth vs. Salinity and Density " -464 f"\n THIS IS AN UNSTABLE WATER COLUMN " -465 f"\n(Higher density fluid lies above lower density fluid)") -466 else: -467 plt.title( -468 f"{filename}\n Depth vs. Salinity and Density " -469 f"\n THIS IS AN UNSTABLE WATER COLUMN " -470 f"\n(Higher density fluid lies above lower density fluid)") -471 ax1.grid(True) -472 lines, labels = ax1.get_legend_handles_labels() -473 ax2_legend = ax2.get_legend_handles_labels() -474 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='upper center', bbox_to_anchor=(0.5, -0.15), -475 ncol=3) -476 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes.png") -477 plot_folder = os.path.join(self._cwd, "plots") -478 if not (os.path.isdir(plot_folder)): -479 os.mkdir(plot_folder) -480 plt.savefig(plot_path) -481 plt.close(fig) + 419 def plot_depth_density_salinity_mld_scatter(self): +420 """ +421 Generates a scatter plot of depth vs. salinity and density. +422 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +423 Saves the plot as an image file. +424 """ +425 df = self._ctd_array.copy() +426 filename = self._filename +427 plt.rcParams.update({'font.size': 16}) +428 df_filtered = df +429 if df_filtered.empty: +430 plt.close() +431 return +432 df_filtered = df_filtered.reset_index(drop=True) +433 fig, ax1 = plt.subplots(figsize=(18, 18)) +434 ax1.invert_yaxis() +435 # Dynamically set y-axis limits based on depth data +436 max_depth = df_filtered['depth_00'].max() +437 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +438 color_salinity = 'tab:blue' +439 ax1.scatter(df_filtered['salinity_00'], df_filtered['depth_00'], color=color_salinity, +440 label='Practical Salinity') +441 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) +442 ax1.set_ylabel('Depth (m)') +443 ax1.tick_params(axis='x', labelcolor=color_salinity) +444 ax2 = ax1.twiny() +445 color_density = 'tab:red' +446 ax2.scatter(df_filtered['density'], df_filtered['depth_00'], color=color_density, label='Density (kg/m^3)') +447 ax2.set_xlabel('Density (kg/m^3)', color=color_density) +448 ax2.tick_params(axis='x', labelcolor=color_density) +449 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) +450 mld_cols = [] +451 for col in df.columns: +452 if 'MLD' in col and 'Actual' not in col: +453 mld_cols.append(df[col]) +454 refdepth_cols = [] +455 for col in df.columns: +456 if 'Actual' in col: +457 refdepth_cols.append(df[col]) +458 for idx, mld_col in enumerate(mld_cols): +459 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +460 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +461 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +462 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +463 if df_filtered['overturn'].any(): +464 plt.title( +465 f"{filename}\n Depth vs. Salinity and Density " +466 f"\n THIS IS AN UNSTABLE WATER COLUMN " +467 f"\n(Higher density fluid lies above lower density fluid)") +468 else: +469 plt.title( +470 f"{filename}\n Depth vs. Salinity and Density " +471 f"\n THIS IS AN UNSTABLE WATER COLUMN " +472 f"\n(Higher density fluid lies above lower density fluid)") +473 ax1.grid(True) +474 lines, labels = ax1.get_legend_handles_labels() +475 ax2_legend = ax2.get_legend_handles_labels() +476 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='upper center', bbox_to_anchor=(0.5, -0.15), +477 ncol=3) +478 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes.png") +479 plot_folder = os.path.join(self._cwd, "plots") +480 if not (os.path.isdir(plot_folder)): +481 os.mkdir(plot_folder) +482 plt.savefig(plot_path) +483 plt.close(fig) @@ -3346,64 +3367,64 @@ Parameters
419 def plot_depth_density_salinity_mld_scatter(self): +420 """ +421 Generates a scatter plot of depth vs. salinity and density. +422 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +423 Saves the plot as an image file. +424 """ +425 df = self._ctd_array.copy() +426 filename = self._filename +427 plt.rcParams.update({'font.size': 16}) +428 df_filtered = df +429 if df_filtered.empty: +430 plt.close() +431 return +432 df_filtered = df_filtered.reset_index(drop=True) +433 fig, ax1 = plt.subplots(figsize=(18, 18)) +434 ax1.invert_yaxis() +435 # Dynamically set y-axis limits based on depth data +436 max_depth = df_filtered['depth_00'].max() +437 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +438 color_salinity = 'tab:blue' +439 ax1.scatter(df_filtered['salinity_00'], df_filtered['depth_00'], color=color_salinity, +440 label='Practical Salinity') +441 ax1.set_xlabel('Practical Salinity (PSU)', color=color_salinity) +442 ax1.set_ylabel('Depth (m)') +443 ax1.tick_params(axis='x', labelcolor=color_salinity) +444 ax2 = ax1.twiny() +445 color_density = 'tab:red' +446 ax2.scatter(df_filtered['density'], df_filtered['depth_00'], color=color_density, label='Density (kg/m^3)') +447 ax2.set_xlabel('Density (kg/m^3)', color=color_density) +448 ax2.tick_params(axis='x', labelcolor=color_density) +449 ax2.xaxis.set_major_formatter(ScalarFormatter(useOffset=False)) +450 mld_cols = [] +451 for col in df.columns: +452 if 'MLD' in col and 'Actual' not in col: +453 mld_cols.append(df[col]) +454 refdepth_cols = [] +455 for col in df.columns: +456 if 'Actual' in col: +457 refdepth_cols.append(df[col]) +458 for idx, mld_col in enumerate(mld_cols): +459 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +460 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +461 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +462 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +463 if df_filtered['overturn'].any(): +464 plt.title( +465 f"{filename}\n Depth vs. Salinity and Density " +466 f"\n THIS IS AN UNSTABLE WATER COLUMN " +467 f"\n(Higher density fluid lies above lower density fluid)") +468 else: +469 plt.title( +470 f"{filename}\n Depth vs. Salinity and Density " +471 f"\n THIS IS AN UNSTABLE WATER COLUMN " +472 f"\n(Higher density fluid lies above lower density fluid)") +473 ax1.grid(True) +474 lines, labels = ax1.get_legend_handles_labels() +475 ax2_legend = ax2.get_legend_handles_labels() +476 ax1.legend(lines + ax2_legend[0], labels + ax2_legend[1], loc='upper center', bbox_to_anchor=(0.5, -0.15), +477 ncol=3) +478 plot_path = os.path.join(self._cwd, f"plots/{filename}_salinity_density_depth_plot_dual_x_axes.png") +479 plot_folder = os.path.join(self._cwd, "plots") +480 if not (os.path.isdir(plot_folder)): +481 os.mkdir(plot_folder) +482 plt.savefig(plot_path) +483 plt.close(fig)
483 def plot_depth_temperature_scatter(self): -484 """ -485 Generates a scatter plot of depth vs. temperature. -486 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. -487 Saves the plot as an image file. -488 """ -489 df = self._ctd_array.copy() -490 filename = self._filename -491 plt.rcParams.update({'font.size': 16}) -492 df_filtered = df -493 if df_filtered.empty: -494 plt.close() -495 return -496 df_filtered = df_filtered.reset_index(drop=True) -497 fig, ax1 = plt.subplots(figsize=(18, 18)) -498 ax1.invert_yaxis() -499 # Dynamically set y-axis limits based on depth data -500 max_depth = df_filtered['depth_00'].max() -501 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards -502 -503 color_temp = 'tab:blue' -504 ax1.scatter(df_filtered['temperature_00'], df_filtered['depth_00'], color=color_temp, -505 label="Temperature (°C)") -506 ax1.set_xlabel("Temperature (°C)", color=color_temp) -507 ax1.set_ylabel('Depth (m)') -508 ax1.tick_params(axis='x', labelcolor=color_temp) -509 mld_cols = [] -510 for col in df.columns: -511 if "MLD" in col and "Actual" not in col: -512 mld_cols.append(df[col]) -513 refdepth_cols = [] -514 for col in df.columns: -515 if "Reference Depth" in col: -516 refdepth_cols.append(df[col]) -517 for idx, mld_col in enumerate(mld_cols): -518 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', -519 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') -520 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', -521 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) -522 if df_filtered['overturn'].any(): -523 plt.title( -524 f"{filename}\n Depth vs. Temperature \n " -525 f"THIS IS AN UNSTABLE WATER COLUMN \n" -526 f"(Higher density fluid lies above lower density fluid)") -527 else: -528 plt.title( -529 f"{filename}\n Depth vs. Temperature \n " -530 f"THIS IS AN UNSTABLE WATER COLUMN \n" -531 f"(Higher density fluid lies above lower density fluid)") -532 ax1.grid(True) -533 lines, labels = ax1.get_legend_handles_labels() -534 ax1.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3) -535 plot_path = os.path.join(self._cwd, f"plots/{filename}_temperature_depth_plot.png") -536 plot_folder = os.path.join(self._cwd, "plots") -537 if not (os.path.isdir(plot_folder)): -538 os.mkdir(plot_folder) -539 plt.savefig(plot_path) -540 plt.close(fig) + 485 def plot_depth_temperature_scatter(self): +486 """ +487 Generates a scatter plot of depth vs. temperature. +488 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +489 Saves the plot as an image file. +490 """ +491 df = self._ctd_array.copy() +492 filename = self._filename +493 plt.rcParams.update({'font.size': 16}) +494 df_filtered = df +495 if df_filtered.empty: +496 plt.close() +497 return +498 df_filtered = df_filtered.reset_index(drop=True) +499 fig, ax1 = plt.subplots(figsize=(18, 18)) +500 ax1.invert_yaxis() +501 # Dynamically set y-axis limits based on depth data +502 max_depth = df_filtered['depth_00'].max() +503 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +504 +505 color_temp = 'tab:blue' +506 ax1.scatter(df_filtered['temperature_00'], df_filtered['depth_00'], color=color_temp, +507 label="Temperature (°C)") +508 ax1.set_xlabel("Temperature (°C)", color=color_temp) +509 ax1.set_ylabel('Depth (m)') +510 ax1.tick_params(axis='x', labelcolor=color_temp) +511 mld_cols = [] +512 for col in df.columns: +513 if "MLD" in col and "Actual" not in col: +514 mld_cols.append(df[col]) +515 refdepth_cols = [] +516 for col in df.columns: +517 if "Reference Depth" in col: +518 refdepth_cols.append(df[col]) +519 for idx, mld_col in enumerate(mld_cols): +520 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +521 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +522 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +523 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +524 if df_filtered['overturn'].any(): +525 plt.title( +526 f"{filename}\n Depth vs. Temperature \n " +527 f"THIS IS AN UNSTABLE WATER COLUMN \n" +528 f"(Higher density fluid lies above lower density fluid)") +529 else: +530 plt.title( +531 f"{filename}\n Depth vs. Temperature \n " +532 f"THIS IS AN UNSTABLE WATER COLUMN \n" +533 f"(Higher density fluid lies above lower density fluid)") +534 ax1.grid(True) +535 lines, labels = ax1.get_legend_handles_labels() +536 ax1.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3) +537 plot_path = os.path.join(self._cwd, f"plots/{filename}_temperature_depth_plot.png") +538 plot_folder = os.path.join(self._cwd, "plots") +539 if not (os.path.isdir(plot_folder)): +540 os.mkdir(plot_folder) +541 plt.savefig(plot_path) +542 plt.close(fig) @@ -3426,341 +3447,346 @@ Parameters
485 def plot_depth_temperature_scatter(self): +486 """ +487 Generates a scatter plot of depth vs. temperature. +488 Adds horizontal lines indicating the mixed layer depth (MLD) at different reference depths. +489 Saves the plot as an image file. +490 """ +491 df = self._ctd_array.copy() +492 filename = self._filename +493 plt.rcParams.update({'font.size': 16}) +494 df_filtered = df +495 if df_filtered.empty: +496 plt.close() +497 return +498 df_filtered = df_filtered.reset_index(drop=True) +499 fig, ax1 = plt.subplots(figsize=(18, 18)) +500 ax1.invert_yaxis() +501 # Dynamically set y-axis limits based on depth data +502 max_depth = df_filtered['depth_00'].max() +503 ax1.set_ylim([max_depth, 0]) # Assuming depth increases downwards +504 +505 color_temp = 'tab:blue' +506 ax1.scatter(df_filtered['temperature_00'], df_filtered['depth_00'], color=color_temp, +507 label="Temperature (°C)") +508 ax1.set_xlabel("Temperature (°C)", color=color_temp) +509 ax1.set_ylabel('Depth (m)') +510 ax1.tick_params(axis='x', labelcolor=color_temp) +511 mld_cols = [] +512 for col in df.columns: +513 if "MLD" in col and "Actual" not in col: +514 mld_cols.append(df[col]) +515 refdepth_cols = [] +516 for col in df.columns: +517 if "Reference Depth" in col: +518 refdepth_cols.append(df[col]) +519 for idx, mld_col in enumerate(mld_cols): +520 ax1.axhline(y=mld_col.iloc[0], color='green', linestyle='--', +521 label=f'MLD {refdepth_cols[idx].iloc[0]} Ref') +522 ax1.text(0.95, mld_col.iloc[0], f'MLD with respect to {refdepth_cols[idx].iloc[0]}m', va='center', +523 ha='right', backgroundcolor='white', color='green', transform=ax1.get_yaxis_transform()) +524 if df_filtered['overturn'].any(): +525 plt.title( +526 f"{filename}\n Depth vs. Temperature \n " +527 f"THIS IS AN UNSTABLE WATER COLUMN \n" +528 f"(Higher density fluid lies above lower density fluid)") +529 else: +530 plt.title( +531 f"{filename}\n Depth vs. Temperature \n " +532 f"THIS IS AN UNSTABLE WATER COLUMN \n" +533 f"(Higher density fluid lies above lower density fluid)") +534 ax1.grid(True) +535 lines, labels = ax1.get_legend_handles_labels() +536 ax1.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3) +537 plot_path = os.path.join(self._cwd, f"plots/{filename}_temperature_depth_plot.png") +538 plot_folder = os.path.join(self._cwd, "plots") +539 if not (os.path.isdir(plot_folder)): +540 os.mkdir(plot_folder) +541 plt.savefig(plot_path) +542 plt.close(fig)
542 class Utility: -543 """ -544 Utility -545 -------- -546 Utility class for CTD data processing. -547 -548 Attributes -549 ---------- -550 filename : str -551 Filename of the RSK file. -552 mastersheet : str -553 Path to the master sheet Excel file. -554 """ -555 -556 def __init__(self, filename): -557 """ -558 Initialize a new Utility object. -559 Parameters -560 ---------- -561 filename : str -562 The filename of the RSK file. -563 """ -564 self.filename = filename -565 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) -566 -567 def no_values_in_object(self, object_to_check): -568 """ -569 Checks if the given object is None, empty, or has a length greater than 0. -570 Returns True if the object has no values, False otherwise. -571 -572 Parameters -573 ---------- -574 object_to_check : object -575 The object to check for values. -576 Returns -577 -------- -578 bool -579 True if the object has no values, False otherwise. -580 """ -581 if isinstance(object_to_check, type(None)): -582 return True -583 if object_to_check.empty: + 544 class Utility: +545 """ +546 Utility +547 -------- +548 Utility class for CTD data processing. +549 +550 Attributes +551 ---------- +552 filename : str +553 Filename of the RSK file. +554 mastersheet : str +555 Path to the master sheet Excel file. +556 """ +557 +558 def __init__(self, filename): +559 """ +560 Initialize a new Utility object. +561 Parameters +562 ---------- +563 filename : str +564 The filename of the RSK file. +565 """ +566 self.filename = filename +567 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) +568 +569 def no_values_in_object(self, object_to_check): +570 """ +571 Checks if the given object is None, empty, or has a length greater than 0. +572 Returns True if the object has no values, False otherwise. +573 +574 Parameters +575 ---------- +576 object_to_check : object +577 The object to check for values. +578 Returns +579 -------- +580 bool +581 True if the object has no values, False otherwise. +582 """ +583 if isinstance(object_to_check, type(None)): 584 return True -585 if len(object_to_check) > 0: -586 return False -587 -588 def process_master_sheet(self, master_sheet_path, filename): -589 """ -590 Extracts the date and time components from the filename and compares them with the data -591 in the master sheet. Calculates the absolute differences between the dates and times to -592 find the closest match. Returns the estimated latitude, longitude, and updated filename -593 based on the closest match. -594 -595 Parameters -596 ---------- -597 master_sheet_path : str -598 The path to the master sheet Excel file. -599 -600 filename : str -601 The filename of the RSK file. -602 -603 Returns -604 ------- -605 tuple -606 A tuple containing the estimated latitude, longitude, and updated filename. -607 """ -608 -609 def get_date_from_string(filename): -610 try: -611 year = filename.split('_')[1][:4] -612 month = filename.split('_')[1][4:6] -613 day = filename.split('_')[1][6:] -614 hour = filename.split('_')[2][0:2] -615 minute = filename.split('_')[2][2:4] -616 time = f"{hour}:{minute}" -617 return float(year), float(month), float(day), time -618 except: -619 return None, None, None, None -620 -621 # Function to calculate the absolute difference between two dates -622 def date_difference(row, target_year, target_month, target_day): -623 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( -624 row['day'] - target_day) -625 -626 # Function to calculate the absolute difference between two times -627 def time_difference(target_time, df_time): -628 df_time_str = str(df_time) -629 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] -630 try: -631 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] -632 except: -633 return None -634 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) -635 -636 # Load the master sheet -637 master_df = pd.read_excel(master_sheet_path) -638 # Get date and time components from the filename -639 year, month, day, time = get_date_from_string(filename) -640 if year is None: -641 return -642 # Calculate absolute differences for each row in 'master_df' -643 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) -644 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) -645 # Find the rows with the smallest total difference for date -646 smallest_date_difference = master_df['date_difference'].min() -647 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] -648 # Check if time_difference returns None -649 if closest_date_rows['time_difference'].isnull().any(): -650 closest_time_time = None -651 closest_row_overall = closest_date_rows.iloc[0] -652 else: -653 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference -654 if len(closest_date_rows) > 1: -655 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] -656 closest_row_overall = closest_time_row -657 closest_time_time = closest_row_overall['time_local'] -658 else: -659 closest_row_overall = closest_date_rows.iloc[0] -660 closest_time_time = closest_row_overall['time_local'] -661 latitude = closest_row_overall['latitude'] -662 longitude = closest_row_overall['longitude'] -663 unique_id = closest_row_overall.iloc[0] -664 RBRfilename = filename + "_gpscm" -665 # Access the closest date components -666 closest_date_year = closest_row_overall['year'] -667 closest_date_month = closest_row_overall['month'] -668 closest_date_day = closest_row_overall['day'] -669 # Print the closest date and time -670 print("|-ESTIMATION ALERT-|") -671 print("Had to guess location on file: " + filename) -672 print("Unique ID: " + unique_id) -673 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) -674 print("Lat: " + str(latitude)) -675 print("Long: " + str(longitude)) -676 if closest_time_time: -677 print("Closest Time:", closest_time_time) -678 print("====================") -679 return latitude, longitude, RBRfilename -680 -681 def get_sample_location(self, rsk, filename): -682 """ -683 Retrieves the sample location data from the RSK file. If no location data is found, -684 it attempts to estimate the location using the master sheet. Returns the latitude, -685 longitude, and updated filename. -686 -687 Parameters -688 ---------- -689 rsk : RSK -690 Ruskin object of the RSK file. -691 filename : str -692 The filename of the RSK file. +585 if object_to_check.empty: +586 return True +587 if len(object_to_check) > 0: +588 return False +589 +590 def process_master_sheet(self, master_sheet_path, filename): +591 """ +592 Extracts the date and time components from the filename and compares them with the data +593 in the master sheet. Calculates the absolute differences between the dates and times to +594 find the closest match. Returns the estimated latitude, longitude, and updated filename +595 based on the closest match. +596 +597 Parameters +598 ---------- +599 master_sheet_path : str +600 The path to the master sheet Excel file. +601 +602 filename : str +603 The filename of the RSK file. +604 +605 Returns +606 ------- +607 tuple +608 A tuple containing the estimated latitude, longitude, and updated filename. +609 """ +610 +611 def get_date_from_string(filename): +612 try: +613 year = filename.split('_')[1][:4] +614 month = filename.split('_')[1][4:6] +615 day = filename.split('_')[1][6:] +616 hour = filename.split('_')[2][0:2] +617 minute = filename.split('_')[2][2:4] +618 time = f"{hour}:{minute}" +619 return float(year), float(month), float(day), time +620 except: +621 return None, None, None, None +622 +623 # Function to calculate the absolute difference between two dates +624 def date_difference(row, target_year, target_month, target_day): +625 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( +626 row['day'] - target_day) +627 +628 # Function to calculate the absolute difference between two times +629 def time_difference(target_time, df_time): +630 df_time_str = str(df_time) +631 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] +632 try: +633 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] +634 except: +635 return None +636 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) +637 +638 # Check if the master sheet is already cached +639 if CTD._cached_master_sheet is None: +640 # Load the master sheet and cache it +641 CTD._cached_master_sheet = pd.read_excel(master_sheet_path) +642 +643 # Use the cached master sheet data +644 master_df = CTD._cached_master_sheet.copy() +645 # Get date and time components from the filename +646 year, month, day, time = get_date_from_string(filename) +647 if year is None: +648 return +649 # Calculate absolute differences for each row in 'master_df' +650 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) +651 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) +652 # Find the rows with the smallest total difference for date +653 smallest_date_difference = master_df['date_difference'].min() +654 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] +655 # Check if time_difference returns None +656 if closest_date_rows['time_difference'].isnull().any(): +657 closest_time_time = None +658 closest_row_overall = closest_date_rows.iloc[0] +659 else: +660 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference +661 if len(closest_date_rows) > 1: +662 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] +663 closest_row_overall = closest_time_row +664 closest_time_time = closest_row_overall['time_local'] +665 else: +666 closest_row_overall = closest_date_rows.iloc[0] +667 closest_time_time = closest_row_overall['time_local'] +668 latitude = closest_row_overall['latitude'] +669 longitude = closest_row_overall['longitude'] +670 unique_id = closest_row_overall.iloc[0] +671 RBRfilename = filename + "_gpscm" +672 # Access the closest date components +673 closest_date_year = closest_row_overall['year'] +674 closest_date_month = closest_row_overall['month'] +675 closest_date_day = closest_row_overall['day'] +676 # Print the closest date and time +677 print("|-ESTIMATION ALERT-|") +678 print("Had to guess location on file: " + filename) +679 print("Unique ID: " + unique_id) +680 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) +681 print("Lat: " + str(latitude)) +682 print("Long: " + str(longitude)) +683 if closest_time_time: +684 print("Closest Time:", closest_time_time) +685 print("====================") +686 return latitude, longitude, RBRfilename +687 +688 def get_sample_location(self, rsk, filename): +689 """ +690 Retrieves the sample location data from the RSK file. If no location data is found, +691 it attempts to estimate the location using the master sheet. Returns the latitude, +692 longitude, and updated filename. 693 -694 Returns -695 ------- -696 tuple -697 A tuple containing the latitude associated with the sample, longitude associated with the sample, -698 and the filename, adds _gps if the location was in the ruskin file, -699 _gpscm if located via mastersheet, or _gpserror if unable to locate. -700 """ -701 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one -702 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) -703 if geo_data_length < 1: -704 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -705 self.mastersheet, filename) -706 return latitude_intermediate, longitude_intermediate, filename -707 else: -708 for geo in itertools.islice(rsk.geodata(), None): -709 # Is there geo data? -710 if geo.latitude is not None: -711 # If there is, is it from the southern ocean? -712 if not (geo.latitude > -60): -713 try: -714 latitude_intermediate = geo.latitude[0] -715 longitude_intermediate = geo.longitude[0] -716 filename += "_gps" -717 return latitude_intermediate, longitude_intermediate, filename -718 except: -719 latitude_intermediate = geo.latitude -720 longitude_intermediate = geo.longitude -721 filename += "_gps" -722 return latitude_intermediate, longitude_intermediate, filename -723 else: -724 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -725 self.mastersheet, filename) -726 return latitude_intermediate, longitude_intermediate, filename -727 else: -728 return None, None, filename + 'gpserror' -729 -730 def remove_sample_timezone_indicator(self, df): -731 """ -732 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the -733 given DataFrame. Returns the updated DataFrame. -734 -735 Parameters -736 ---------- -737 df : DataFrame -738 The DataFrame to process. -739 -740 Returns -741 ------- -742 DataFrame -743 The updated DataFrame with the timezone indicator removed. -744 """ -745 if self.no_values_in_object(df): -746 return None -747 if 'timestamp' in df.columns: -748 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] -749 return df -750 else: -751 return df -752 -753 def remove_rows_with_negative_depth(self, df): -754 """ -755 Removes rows from the given DataFrame where the 'depth_00' column has negative values. -756 Returns the updated DataFrame. -757 -758 Parameter -759 --------- -760 df : DataFrame -761 The DataFrame to process. -762 -763 Returns -764 ------- -765 DataFrame -766 The updated DataFrame with rows containing negative depth values removed. -767 """ -768 if self.no_values_in_object(df): -769 return None -770 if 'depth_00' in df.columns: -771 df = df[df['depth_00'] >= 0].reset_index(drop=True) -772 else: -773 return None -774 if self.no_values_in_object(df): -775 return None -776 return df.copy() -777 -778 def remove_rows_with_negative_salinity(self, df): -779 """ -780 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. -781 Returns the updated DataFrame. -782 -783 Parameters -784 ---------- -785 df: DataFrame -786 The DataFrame to process. -787 -788 Returns -789 ------- -790 DataFrame -791 The updated DataFrame with rows containing negative salinity values removed. -792 """ -793 if self.no_values_in_object(df): -794 return None -795 if 'salinity_00' in df.columns: -796 df = df[df['salinity_00'] >= 0].reset_index(drop=True) -797 else: -798 return None -799 if self.no_values_in_object(df): -800 return None -801 return df.copy() -802 -803 def remove_rows_with_negative_pressure(self, df): -804 """ -805 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. -806 Returns the updated DataFrame. -807 -808 Parameters -809 ---------- -810 df: DataFrame -811 The DataFrame to process. -812 -813 Returns -814 ------- -815 DataFrame -816 The updated DataFrame with rows containing negative pressure values removed. -817 """ -818 if self.no_values_in_object(df): -819 return None -820 if 'pressure_00' in df.columns: -821 df = df[df['pressure_00'] >= 0].reset_index(drop=True) -822 else: -823 return None -824 if self.no_values_in_object(df): -825 return None -826 return df.copy() -827 -828 def remove_rows_with_negative_salinityabs(self, df): -829 """ -830 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. -831 Returns the updated DataFrame. -832 -833 Parameters -834 ---------- -835 df: DataFrame -836 The DataFrame to process. -837 -838 Returns -839 ------- -840 DataFrame -841 The updated DataFrame with rows containing negative absolute salinity values removed. -842 """ -843 if self.no_values_in_object(df): -844 return None -845 if 'salinityabs' in df.columns: -846 df = df[df['salinityabs'] >= 0].reset_index(drop=True) -847 else: -848 return None -849 if self.no_values_in_object(df): -850 return None -851 return df.copy() -852 -853 def remove_rows_with_negative_density(self, df): -854 """ -855 Removes rows from the given DataFrame where the 'density' column has negative values. -856 Returns the updated DataFrame. -857 -858 Parameters -859 ---------- -860 df: DataFrame -861 The DataFrame to process. -862 -863 Returns -864 ------- -865 DataFrame -866 The updated DataFrame with rows containing negative density values removed. -867 """ -868 if self.no_values_in_object(df): -869 return None -870 if 'density' in df.columns: -871 df = df[df['density'] >= 0].reset_index(drop=True) -872 else: -873 return None -874 if self.no_values_in_object(df): -875 return None -876 return df.copy() +694 Parameters +695 ---------- +696 rsk : RSK +697 Ruskin object of the RSK file. +698 filename : str +699 The filename of the RSK file. +700 +701 Returns +702 ------- +703 tuple +704 A tuple containing the latitude associated with the sample, longitude associated with the sample, +705 and the filename, adds _gps if the location was in the ruskin file, +706 _gpscm if located via mastersheet, or _gpserror if unable to locate. +707 """ +708 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one +709 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) +710 if geo_data_length < 1: +711 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +712 self.mastersheet, filename) +713 return latitude_intermediate, longitude_intermediate, filename +714 else: +715 for geo in itertools.islice(rsk.geodata(), None): +716 # Is there geo data? +717 if geo.latitude is not None: +718 # If there is, is it from the southern ocean? +719 if not (geo.latitude > -60): +720 try: +721 latitude_intermediate = geo.latitude[0] +722 longitude_intermediate = geo.longitude[0] +723 filename += "_gps" +724 return latitude_intermediate, longitude_intermediate, filename +725 except: +726 latitude_intermediate = geo.latitude +727 longitude_intermediate = geo.longitude +728 filename += "_gps" +729 return latitude_intermediate, longitude_intermediate, filename +730 else: +731 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +732 self.mastersheet, filename) +733 return latitude_intermediate, longitude_intermediate, filename +734 else: +735 return None, None, filename + 'gpserror' +736 +737 def remove_sample_timezone_indicator(self, df): +738 """ +739 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the +740 given DataFrame. Returns the updated DataFrame. +741 +742 Parameters +743 ---------- +744 df : DataFrame +745 The DataFrame to process. +746 +747 Returns +748 ------- +749 DataFrame +750 The updated DataFrame with the timezone indicator removed. +751 """ +752 if self.no_values_in_object(df): +753 return None +754 if 'timestamp' in df.columns: +755 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] +756 return df +757 else: +758 return df +759 +760 def remove_rows_with_negative_depth(self, df): +761 """ +762 Removes rows from the given DataFrame where the 'depth_00' column has negative values. +763 Returns the updated DataFrame. +764 +765 Parameter +766 --------- +767 df : DataFrame +768 The DataFrame to process. +769 +770 Returns +771 ------- +772 DataFrame +773 The updated DataFrame with rows containing negative depth values removed. +774 """ +775 if self.no_values_in_object(df): +776 return None +777 if 'depth_00' in df.columns: +778 df = df[df['depth_00'] >= 0].reset_index(drop=True) +779 else: +780 return None +781 if self.no_values_in_object(df): +782 return None +783 return df.copy() +784 +785 def remove_rows_with_negative_salinity(self, df): +786 """ +787 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. +788 Returns the updated DataFrame. +789 +790 Parameters +791 ---------- +792 df: DataFrame +793 The DataFrame to process. +794 +795 Returns +796 ------- +797 DataFrame +798 The updated DataFrame with rows containing negative salinity values removed. +799 """ +800 if self.no_values_in_object(df): +801 return None +802 if 'salinity_00' in df.columns: +803 df = df[df['salinity_00'] >= 0].reset_index(drop=True) +804 else: +805 return None +806 if self.no_values_in_object(df): +807 return None +808 return df.copy() +809 +810 def remove_rows_with_negative_pressure(self, df): +811 """ +812 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. +813 Returns the updated DataFrame. +814 +815 Parameters +816 ---------- +817 df: DataFrame +818 The DataFrame to process. +819 +820 Returns +821 ------- +822 DataFrame +823 The updated DataFrame with rows containing negative pressure values removed. +824 """ +825 if self.no_values_in_object(df): +826 return None +827 if 'pressure_00' in df.columns: +828 df = df[df['pressure_00'] >= 0].reset_index(drop=True) +829 else: +830 return None +831 if self.no_values_in_object(df): +832 return None +833 return df.copy() +834 +835 def remove_rows_with_negative_salinityabs(self, df): +836 """ +837 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. +838 Returns the updated DataFrame. +839 +840 Parameters +841 ---------- +842 df: DataFrame +843 The DataFrame to process. +844 +845 Returns +846 ------- +847 DataFrame +848 The updated DataFrame with rows containing negative absolute salinity values removed. +849 """ +850 if self.no_values_in_object(df): +851 return None +852 if 'salinityabs' in df.columns: +853 df = df[df['salinityabs'] >= 0].reset_index(drop=True) +854 else: +855 return None +856 if self.no_values_in_object(df): +857 return None +858 return df.copy() +859 +860 def remove_rows_with_negative_density(self, df): +861 """ +862 Removes rows from the given DataFrame where the 'density' column has negative values. +863 Returns the updated DataFrame. +864 +865 Parameters +866 ---------- +867 df: DataFrame +868 The DataFrame to process. +869 +870 Returns +871 ------- +872 DataFrame +873 The updated DataFrame with rows containing negative density values removed. +874 """ +875 if self.no_values_in_object(df): +876 return None +877 if 'density' in df.columns: +878 df = df[df['density'] >= 0].reset_index(drop=True) +879 else: +880 return None +881 if self.no_values_in_object(df): +882 return None +883 return df.copy() @@ -3789,16 +3815,16 @@ Attributes
544 class Utility: +545 """ +546 Utility +547 -------- +548 Utility class for CTD data processing. +549 +550 Attributes +551 ---------- +552 filename : str +553 Filename of the RSK file. +554 mastersheet : str +555 Path to the master sheet Excel file. +556 """ +557 +558 def __init__(self, filename): +559 """ +560 Initialize a new Utility object. +561 Parameters +562 ---------- +563 filename : str +564 The filename of the RSK file. +565 """ +566 self.filename = filename +567 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) +568 +569 def no_values_in_object(self, object_to_check): +570 """ +571 Checks if the given object is None, empty, or has a length greater than 0. +572 Returns True if the object has no values, False otherwise. +573 +574 Parameters +575 ---------- +576 object_to_check : object +577 The object to check for values. +578 Returns +579 -------- +580 bool +581 True if the object has no values, False otherwise. +582 """ +583 if isinstance(object_to_check, type(None)): 584 return True -585 if len(object_to_check) > 0: -586 return False -587 -588 def process_master_sheet(self, master_sheet_path, filename): -589 """ -590 Extracts the date and time components from the filename and compares them with the data -591 in the master sheet. Calculates the absolute differences between the dates and times to -592 find the closest match. Returns the estimated latitude, longitude, and updated filename -593 based on the closest match. -594 -595 Parameters -596 ---------- -597 master_sheet_path : str -598 The path to the master sheet Excel file. -599 -600 filename : str -601 The filename of the RSK file. -602 -603 Returns -604 ------- -605 tuple -606 A tuple containing the estimated latitude, longitude, and updated filename. -607 """ -608 -609 def get_date_from_string(filename): -610 try: -611 year = filename.split('_')[1][:4] -612 month = filename.split('_')[1][4:6] -613 day = filename.split('_')[1][6:] -614 hour = filename.split('_')[2][0:2] -615 minute = filename.split('_')[2][2:4] -616 time = f"{hour}:{minute}" -617 return float(year), float(month), float(day), time -618 except: -619 return None, None, None, None -620 -621 # Function to calculate the absolute difference between two dates -622 def date_difference(row, target_year, target_month, target_day): -623 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( -624 row['day'] - target_day) -625 -626 # Function to calculate the absolute difference between two times -627 def time_difference(target_time, df_time): -628 df_time_str = str(df_time) -629 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] -630 try: -631 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] -632 except: -633 return None -634 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) -635 -636 # Load the master sheet -637 master_df = pd.read_excel(master_sheet_path) -638 # Get date and time components from the filename -639 year, month, day, time = get_date_from_string(filename) -640 if year is None: -641 return -642 # Calculate absolute differences for each row in 'master_df' -643 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) -644 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) -645 # Find the rows with the smallest total difference for date -646 smallest_date_difference = master_df['date_difference'].min() -647 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] -648 # Check if time_difference returns None -649 if closest_date_rows['time_difference'].isnull().any(): -650 closest_time_time = None -651 closest_row_overall = closest_date_rows.iloc[0] -652 else: -653 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference -654 if len(closest_date_rows) > 1: -655 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] -656 closest_row_overall = closest_time_row -657 closest_time_time = closest_row_overall['time_local'] -658 else: -659 closest_row_overall = closest_date_rows.iloc[0] -660 closest_time_time = closest_row_overall['time_local'] -661 latitude = closest_row_overall['latitude'] -662 longitude = closest_row_overall['longitude'] -663 unique_id = closest_row_overall.iloc[0] -664 RBRfilename = filename + "_gpscm" -665 # Access the closest date components -666 closest_date_year = closest_row_overall['year'] -667 closest_date_month = closest_row_overall['month'] -668 closest_date_day = closest_row_overall['day'] -669 # Print the closest date and time -670 print("|-ESTIMATION ALERT-|") -671 print("Had to guess location on file: " + filename) -672 print("Unique ID: " + unique_id) -673 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) -674 print("Lat: " + str(latitude)) -675 print("Long: " + str(longitude)) -676 if closest_time_time: -677 print("Closest Time:", closest_time_time) -678 print("====================") -679 return latitude, longitude, RBRfilename -680 -681 def get_sample_location(self, rsk, filename): -682 """ -683 Retrieves the sample location data from the RSK file. If no location data is found, -684 it attempts to estimate the location using the master sheet. Returns the latitude, -685 longitude, and updated filename. -686 -687 Parameters -688 ---------- -689 rsk : RSK -690 Ruskin object of the RSK file. -691 filename : str -692 The filename of the RSK file. +585 if object_to_check.empty: +586 return True +587 if len(object_to_check) > 0: +588 return False +589 +590 def process_master_sheet(self, master_sheet_path, filename): +591 """ +592 Extracts the date and time components from the filename and compares them with the data +593 in the master sheet. Calculates the absolute differences between the dates and times to +594 find the closest match. Returns the estimated latitude, longitude, and updated filename +595 based on the closest match. +596 +597 Parameters +598 ---------- +599 master_sheet_path : str +600 The path to the master sheet Excel file. +601 +602 filename : str +603 The filename of the RSK file. +604 +605 Returns +606 ------- +607 tuple +608 A tuple containing the estimated latitude, longitude, and updated filename. +609 """ +610 +611 def get_date_from_string(filename): +612 try: +613 year = filename.split('_')[1][:4] +614 month = filename.split('_')[1][4:6] +615 day = filename.split('_')[1][6:] +616 hour = filename.split('_')[2][0:2] +617 minute = filename.split('_')[2][2:4] +618 time = f"{hour}:{minute}" +619 return float(year), float(month), float(day), time +620 except: +621 return None, None, None, None +622 +623 # Function to calculate the absolute difference between two dates +624 def date_difference(row, target_year, target_month, target_day): +625 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( +626 row['day'] - target_day) +627 +628 # Function to calculate the absolute difference between two times +629 def time_difference(target_time, df_time): +630 df_time_str = str(df_time) +631 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] +632 try: +633 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] +634 except: +635 return None +636 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) +637 +638 # Check if the master sheet is already cached +639 if CTD._cached_master_sheet is None: +640 # Load the master sheet and cache it +641 CTD._cached_master_sheet = pd.read_excel(master_sheet_path) +642 +643 # Use the cached master sheet data +644 master_df = CTD._cached_master_sheet.copy() +645 # Get date and time components from the filename +646 year, month, day, time = get_date_from_string(filename) +647 if year is None: +648 return +649 # Calculate absolute differences for each row in 'master_df' +650 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) +651 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) +652 # Find the rows with the smallest total difference for date +653 smallest_date_difference = master_df['date_difference'].min() +654 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] +655 # Check if time_difference returns None +656 if closest_date_rows['time_difference'].isnull().any(): +657 closest_time_time = None +658 closest_row_overall = closest_date_rows.iloc[0] +659 else: +660 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference +661 if len(closest_date_rows) > 1: +662 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] +663 closest_row_overall = closest_time_row +664 closest_time_time = closest_row_overall['time_local'] +665 else: +666 closest_row_overall = closest_date_rows.iloc[0] +667 closest_time_time = closest_row_overall['time_local'] +668 latitude = closest_row_overall['latitude'] +669 longitude = closest_row_overall['longitude'] +670 unique_id = closest_row_overall.iloc[0] +671 RBRfilename = filename + "_gpscm" +672 # Access the closest date components +673 closest_date_year = closest_row_overall['year'] +674 closest_date_month = closest_row_overall['month'] +675 closest_date_day = closest_row_overall['day'] +676 # Print the closest date and time +677 print("|-ESTIMATION ALERT-|") +678 print("Had to guess location on file: " + filename) +679 print("Unique ID: " + unique_id) +680 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) +681 print("Lat: " + str(latitude)) +682 print("Long: " + str(longitude)) +683 if closest_time_time: +684 print("Closest Time:", closest_time_time) +685 print("====================") +686 return latitude, longitude, RBRfilename +687 +688 def get_sample_location(self, rsk, filename): +689 """ +690 Retrieves the sample location data from the RSK file. If no location data is found, +691 it attempts to estimate the location using the master sheet. Returns the latitude, +692 longitude, and updated filename. 693 -694 Returns -695 ------- -696 tuple -697 A tuple containing the latitude associated with the sample, longitude associated with the sample, -698 and the filename, adds _gps if the location was in the ruskin file, -699 _gpscm if located via mastersheet, or _gpserror if unable to locate. -700 """ -701 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one -702 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) -703 if geo_data_length < 1: -704 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -705 self.mastersheet, filename) -706 return latitude_intermediate, longitude_intermediate, filename -707 else: -708 for geo in itertools.islice(rsk.geodata(), None): -709 # Is there geo data? -710 if geo.latitude is not None: -711 # If there is, is it from the southern ocean? -712 if not (geo.latitude > -60): -713 try: -714 latitude_intermediate = geo.latitude[0] -715 longitude_intermediate = geo.longitude[0] -716 filename += "_gps" -717 return latitude_intermediate, longitude_intermediate, filename -718 except: -719 latitude_intermediate = geo.latitude -720 longitude_intermediate = geo.longitude -721 filename += "_gps" -722 return latitude_intermediate, longitude_intermediate, filename -723 else: -724 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -725 self.mastersheet, filename) -726 return latitude_intermediate, longitude_intermediate, filename -727 else: -728 return None, None, filename + 'gpserror' -729 -730 def remove_sample_timezone_indicator(self, df): -731 """ -732 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the -733 given DataFrame. Returns the updated DataFrame. -734 -735 Parameters -736 ---------- -737 df : DataFrame -738 The DataFrame to process. -739 -740 Returns -741 ------- -742 DataFrame -743 The updated DataFrame with the timezone indicator removed. -744 """ -745 if self.no_values_in_object(df): -746 return None -747 if 'timestamp' in df.columns: -748 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] -749 return df -750 else: -751 return df -752 -753 def remove_rows_with_negative_depth(self, df): -754 """ -755 Removes rows from the given DataFrame where the 'depth_00' column has negative values. -756 Returns the updated DataFrame. -757 -758 Parameter -759 --------- -760 df : DataFrame -761 The DataFrame to process. -762 -763 Returns -764 ------- -765 DataFrame -766 The updated DataFrame with rows containing negative depth values removed. -767 """ -768 if self.no_values_in_object(df): -769 return None -770 if 'depth_00' in df.columns: -771 df = df[df['depth_00'] >= 0].reset_index(drop=True) -772 else: -773 return None -774 if self.no_values_in_object(df): -775 return None -776 return df.copy() -777 -778 def remove_rows_with_negative_salinity(self, df): -779 """ -780 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. -781 Returns the updated DataFrame. -782 -783 Parameters -784 ---------- -785 df: DataFrame -786 The DataFrame to process. -787 -788 Returns -789 ------- -790 DataFrame -791 The updated DataFrame with rows containing negative salinity values removed. -792 """ -793 if self.no_values_in_object(df): -794 return None -795 if 'salinity_00' in df.columns: -796 df = df[df['salinity_00'] >= 0].reset_index(drop=True) -797 else: -798 return None -799 if self.no_values_in_object(df): -800 return None -801 return df.copy() -802 -803 def remove_rows_with_negative_pressure(self, df): -804 """ -805 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. -806 Returns the updated DataFrame. -807 -808 Parameters -809 ---------- -810 df: DataFrame -811 The DataFrame to process. -812 -813 Returns -814 ------- -815 DataFrame -816 The updated DataFrame with rows containing negative pressure values removed. -817 """ -818 if self.no_values_in_object(df): -819 return None -820 if 'pressure_00' in df.columns: -821 df = df[df['pressure_00'] >= 0].reset_index(drop=True) -822 else: -823 return None -824 if self.no_values_in_object(df): -825 return None -826 return df.copy() -827 -828 def remove_rows_with_negative_salinityabs(self, df): -829 """ -830 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. -831 Returns the updated DataFrame. -832 -833 Parameters -834 ---------- -835 df: DataFrame -836 The DataFrame to process. -837 -838 Returns -839 ------- -840 DataFrame -841 The updated DataFrame with rows containing negative absolute salinity values removed. -842 """ -843 if self.no_values_in_object(df): -844 return None -845 if 'salinityabs' in df.columns: -846 df = df[df['salinityabs'] >= 0].reset_index(drop=True) -847 else: -848 return None -849 if self.no_values_in_object(df): -850 return None -851 return df.copy() -852 -853 def remove_rows_with_negative_density(self, df): -854 """ -855 Removes rows from the given DataFrame where the 'density' column has negative values. -856 Returns the updated DataFrame. -857 -858 Parameters -859 ---------- -860 df: DataFrame -861 The DataFrame to process. -862 -863 Returns -864 ------- -865 DataFrame -866 The updated DataFrame with rows containing negative density values removed. -867 """ -868 if self.no_values_in_object(df): -869 return None -870 if 'density' in df.columns: -871 df = df[df['density'] >= 0].reset_index(drop=True) -872 else: -873 return None -874 if self.no_values_in_object(df): -875 return None -876 return df.copy() +694 Parameters +695 ---------- +696 rsk : RSK +697 Ruskin object of the RSK file. +698 filename : str +699 The filename of the RSK file. +700 +701 Returns +702 ------- +703 tuple +704 A tuple containing the latitude associated with the sample, longitude associated with the sample, +705 and the filename, adds _gps if the location was in the ruskin file, +706 _gpscm if located via mastersheet, or _gpserror if unable to locate. +707 """ +708 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one +709 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) +710 if geo_data_length < 1: +711 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +712 self.mastersheet, filename) +713 return latitude_intermediate, longitude_intermediate, filename +714 else: +715 for geo in itertools.islice(rsk.geodata(), None): +716 # Is there geo data? +717 if geo.latitude is not None: +718 # If there is, is it from the southern ocean? +719 if not (geo.latitude > -60): +720 try: +721 latitude_intermediate = geo.latitude[0] +722 longitude_intermediate = geo.longitude[0] +723 filename += "_gps" +724 return latitude_intermediate, longitude_intermediate, filename +725 except: +726 latitude_intermediate = geo.latitude +727 longitude_intermediate = geo.longitude +728 filename += "_gps" +729 return latitude_intermediate, longitude_intermediate, filename +730 else: +731 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +732 self.mastersheet, filename) +733 return latitude_intermediate, longitude_intermediate, filename +734 else: +735 return None, None, filename + 'gpserror' +736 +737 def remove_sample_timezone_indicator(self, df): +738 """ +739 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the +740 given DataFrame. Returns the updated DataFrame. +741 +742 Parameters +743 ---------- +744 df : DataFrame +745 The DataFrame to process. +746 +747 Returns +748 ------- +749 DataFrame +750 The updated DataFrame with the timezone indicator removed. +751 """ +752 if self.no_values_in_object(df): +753 return None +754 if 'timestamp' in df.columns: +755 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] +756 return df +757 else: +758 return df +759 +760 def remove_rows_with_negative_depth(self, df): +761 """ +762 Removes rows from the given DataFrame where the 'depth_00' column has negative values. +763 Returns the updated DataFrame. +764 +765 Parameter +766 --------- +767 df : DataFrame +768 The DataFrame to process. +769 +770 Returns +771 ------- +772 DataFrame +773 The updated DataFrame with rows containing negative depth values removed. +774 """ +775 if self.no_values_in_object(df): +776 return None +777 if 'depth_00' in df.columns: +778 df = df[df['depth_00'] >= 0].reset_index(drop=True) +779 else: +780 return None +781 if self.no_values_in_object(df): +782 return None +783 return df.copy() +784 +785 def remove_rows_with_negative_salinity(self, df): +786 """ +787 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. +788 Returns the updated DataFrame. +789 +790 Parameters +791 ---------- +792 df: DataFrame +793 The DataFrame to process. +794 +795 Returns +796 ------- +797 DataFrame +798 The updated DataFrame with rows containing negative salinity values removed. +799 """ +800 if self.no_values_in_object(df): +801 return None +802 if 'salinity_00' in df.columns: +803 df = df[df['salinity_00'] >= 0].reset_index(drop=True) +804 else: +805 return None +806 if self.no_values_in_object(df): +807 return None +808 return df.copy() +809 +810 def remove_rows_with_negative_pressure(self, df): +811 """ +812 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. +813 Returns the updated DataFrame. +814 +815 Parameters +816 ---------- +817 df: DataFrame +818 The DataFrame to process. +819 +820 Returns +821 ------- +822 DataFrame +823 The updated DataFrame with rows containing negative pressure values removed. +824 """ +825 if self.no_values_in_object(df): +826 return None +827 if 'pressure_00' in df.columns: +828 df = df[df['pressure_00'] >= 0].reset_index(drop=True) +829 else: +830 return None +831 if self.no_values_in_object(df): +832 return None +833 return df.copy() +834 +835 def remove_rows_with_negative_salinityabs(self, df): +836 """ +837 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. +838 Returns the updated DataFrame. +839 +840 Parameters +841 ---------- +842 df: DataFrame +843 The DataFrame to process. +844 +845 Returns +846 ------- +847 DataFrame +848 The updated DataFrame with rows containing negative absolute salinity values removed. +849 """ +850 if self.no_values_in_object(df): +851 return None +852 if 'salinityabs' in df.columns: +853 df = df[df['salinityabs'] >= 0].reset_index(drop=True) +854 else: +855 return None +856 if self.no_values_in_object(df): +857 return None +858 return df.copy() +859 +860 def remove_rows_with_negative_density(self, df): +861 """ +862 Removes rows from the given DataFrame where the 'density' column has negative values. +863 Returns the updated DataFrame. +864 +865 Parameters +866 ---------- +867 df: DataFrame +868 The DataFrame to process. +869 +870 Returns +871 ------- +872 DataFrame +873 The updated DataFrame with rows containing negative density values removed. +874 """ +875 if self.no_values_in_object(df): +876 return None +877 if 'density' in df.columns: +878 df = df[df['density'] >= 0].reset_index(drop=True) +879 else: +880 return None +881 if self.no_values_in_object(df): +882 return None +883 return df.copy()
556 def __init__(self, filename): -557 """ -558 Initialize a new Utility object. -559 Parameters -560 ---------- -561 filename : str -562 The filename of the RSK file. -563 """ -564 self.filename = filename -565 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) + 558 def __init__(self, filename): +559 """ +560 Initialize a new Utility object. +561 Parameters +562 ---------- +563 filename : str +564 The filename of the RSK file. +565 """ +566 self.filename = filename +567 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path) @@ -3847,26 +3873,26 @@ Parameters
558 def __init__(self, filename): +559 """ +560 Initialize a new Utility object. +561 Parameters +562 ---------- +563 filename : str +564 The filename of the RSK file. +565 """ +566 self.filename = filename +567 self.mastersheet = os.path.join(_get_cwd(), CTD.master_sheet_path)
567 def no_values_in_object(self, object_to_check): -568 """ -569 Checks if the given object is None, empty, or has a length greater than 0. -570 Returns True if the object has no values, False otherwise. -571 -572 Parameters -573 ---------- -574 object_to_check : object -575 The object to check for values. -576 Returns -577 -------- -578 bool -579 True if the object has no values, False otherwise. -580 """ -581 if isinstance(object_to_check, type(None)): -582 return True -583 if object_to_check.empty: + 569 def no_values_in_object(self, object_to_check): +570 """ +571 Checks if the given object is None, empty, or has a length greater than 0. +572 Returns True if the object has no values, False otherwise. +573 +574 Parameters +575 ---------- +576 object_to_check : object +577 The object to check for values. +578 Returns +579 -------- +580 bool +581 True if the object has no values, False otherwise. +582 """ +583 if isinstance(object_to_check, type(None)): 584 return True -585 if len(object_to_check) > 0: -586 return False +585 if object_to_check.empty: +586 return True +587 if len(object_to_check) > 0: +588 return False @@ -3900,98 +3926,103 @@ Returns
569 def no_values_in_object(self, object_to_check): +570 """ +571 Checks if the given object is None, empty, or has a length greater than 0. +572 Returns True if the object has no values, False otherwise. +573 +574 Parameters +575 ---------- +576 object_to_check : object +577 The object to check for values. +578 Returns +579 -------- +580 bool +581 True if the object has no values, False otherwise. +582 """ +583 if isinstance(object_to_check, type(None)): 584 return True -585 if len(object_to_check) > 0: -586 return False +585 if object_to_check.empty: +586 return True +587 if len(object_to_check) > 0: +588 return False
588 def process_master_sheet(self, master_sheet_path, filename): -589 """ -590 Extracts the date and time components from the filename and compares them with the data -591 in the master sheet. Calculates the absolute differences between the dates and times to -592 find the closest match. Returns the estimated latitude, longitude, and updated filename -593 based on the closest match. -594 -595 Parameters -596 ---------- -597 master_sheet_path : str -598 The path to the master sheet Excel file. -599 -600 filename : str -601 The filename of the RSK file. -602 -603 Returns -604 ------- -605 tuple -606 A tuple containing the estimated latitude, longitude, and updated filename. -607 """ -608 -609 def get_date_from_string(filename): -610 try: -611 year = filename.split('_')[1][:4] -612 month = filename.split('_')[1][4:6] -613 day = filename.split('_')[1][6:] -614 hour = filename.split('_')[2][0:2] -615 minute = filename.split('_')[2][2:4] -616 time = f"{hour}:{minute}" -617 return float(year), float(month), float(day), time -618 except: -619 return None, None, None, None -620 -621 # Function to calculate the absolute difference between two dates -622 def date_difference(row, target_year, target_month, target_day): -623 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( -624 row['day'] - target_day) -625 -626 # Function to calculate the absolute difference between two times -627 def time_difference(target_time, df_time): -628 df_time_str = str(df_time) -629 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] -630 try: -631 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] -632 except: -633 return None -634 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) -635 -636 # Load the master sheet -637 master_df = pd.read_excel(master_sheet_path) -638 # Get date and time components from the filename -639 year, month, day, time = get_date_from_string(filename) -640 if year is None: -641 return -642 # Calculate absolute differences for each row in 'master_df' -643 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) -644 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) -645 # Find the rows with the smallest total difference for date -646 smallest_date_difference = master_df['date_difference'].min() -647 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] -648 # Check if time_difference returns None -649 if closest_date_rows['time_difference'].isnull().any(): -650 closest_time_time = None -651 closest_row_overall = closest_date_rows.iloc[0] -652 else: -653 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference -654 if len(closest_date_rows) > 1: -655 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] -656 closest_row_overall = closest_time_row -657 closest_time_time = closest_row_overall['time_local'] -658 else: -659 closest_row_overall = closest_date_rows.iloc[0] -660 closest_time_time = closest_row_overall['time_local'] -661 latitude = closest_row_overall['latitude'] -662 longitude = closest_row_overall['longitude'] -663 unique_id = closest_row_overall.iloc[0] -664 RBRfilename = filename + "_gpscm" -665 # Access the closest date components -666 closest_date_year = closest_row_overall['year'] -667 closest_date_month = closest_row_overall['month'] -668 closest_date_day = closest_row_overall['day'] -669 # Print the closest date and time -670 print("|-ESTIMATION ALERT-|") -671 print("Had to guess location on file: " + filename) -672 print("Unique ID: " + unique_id) -673 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) -674 print("Lat: " + str(latitude)) -675 print("Long: " + str(longitude)) -676 if closest_time_time: -677 print("Closest Time:", closest_time_time) -678 print("====================") -679 return latitude, longitude, RBRfilename + 590 def process_master_sheet(self, master_sheet_path, filename): +591 """ +592 Extracts the date and time components from the filename and compares them with the data +593 in the master sheet. Calculates the absolute differences between the dates and times to +594 find the closest match. Returns the estimated latitude, longitude, and updated filename +595 based on the closest match. +596 +597 Parameters +598 ---------- +599 master_sheet_path : str +600 The path to the master sheet Excel file. +601 +602 filename : str +603 The filename of the RSK file. +604 +605 Returns +606 ------- +607 tuple +608 A tuple containing the estimated latitude, longitude, and updated filename. +609 """ +610 +611 def get_date_from_string(filename): +612 try: +613 year = filename.split('_')[1][:4] +614 month = filename.split('_')[1][4:6] +615 day = filename.split('_')[1][6:] +616 hour = filename.split('_')[2][0:2] +617 minute = filename.split('_')[2][2:4] +618 time = f"{hour}:{minute}" +619 return float(year), float(month), float(day), time +620 except: +621 return None, None, None, None +622 +623 # Function to calculate the absolute difference between two dates +624 def date_difference(row, target_year, target_month, target_day): +625 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( +626 row['day'] - target_day) +627 +628 # Function to calculate the absolute difference between two times +629 def time_difference(target_time, df_time): +630 df_time_str = str(df_time) +631 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] +632 try: +633 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] +634 except: +635 return None +636 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) +637 +638 # Check if the master sheet is already cached +639 if CTD._cached_master_sheet is None: +640 # Load the master sheet and cache it +641 CTD._cached_master_sheet = pd.read_excel(master_sheet_path) +642 +643 # Use the cached master sheet data +644 master_df = CTD._cached_master_sheet.copy() +645 # Get date and time components from the filename +646 year, month, day, time = get_date_from_string(filename) +647 if year is None: +648 return +649 # Calculate absolute differences for each row in 'master_df' +650 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) +651 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) +652 # Find the rows with the smallest total difference for date +653 smallest_date_difference = master_df['date_difference'].min() +654 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] +655 # Check if time_difference returns None +656 if closest_date_rows['time_difference'].isnull().any(): +657 closest_time_time = None +658 closest_row_overall = closest_date_rows.iloc[0] +659 else: +660 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference +661 if len(closest_date_rows) > 1: +662 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] +663 closest_row_overall = closest_time_row +664 closest_time_time = closest_row_overall['time_local'] +665 else: +666 closest_row_overall = closest_date_rows.iloc[0] +667 closest_time_time = closest_row_overall['time_local'] +668 latitude = closest_row_overall['latitude'] +669 longitude = closest_row_overall['longitude'] +670 unique_id = closest_row_overall.iloc[0] +671 RBRfilename = filename + "_gpscm" +672 # Access the closest date components +673 closest_date_year = closest_row_overall['year'] +674 closest_date_month = closest_row_overall['month'] +675 closest_date_day = closest_row_overall['day'] +676 # Print the closest date and time +677 print("|-ESTIMATION ALERT-|") +678 print("Had to guess location on file: " + filename) +679 print("Unique ID: " + unique_id) +680 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) +681 print("Lat: " + str(latitude)) +682 print("Long: " + str(longitude)) +683 if closest_time_time: +684 print("Closest Time:", closest_time_time) +685 print("====================") +686 return latitude, longitude, RBRfilename @@ -4029,54 +4060,54 @@ Returns
590 def process_master_sheet(self, master_sheet_path, filename): +591 """ +592 Extracts the date and time components from the filename and compares them with the data +593 in the master sheet. Calculates the absolute differences between the dates and times to +594 find the closest match. Returns the estimated latitude, longitude, and updated filename +595 based on the closest match. +596 +597 Parameters +598 ---------- +599 master_sheet_path : str +600 The path to the master sheet Excel file. +601 +602 filename : str +603 The filename of the RSK file. +604 +605 Returns +606 ------- +607 tuple +608 A tuple containing the estimated latitude, longitude, and updated filename. +609 """ +610 +611 def get_date_from_string(filename): +612 try: +613 year = filename.split('_')[1][:4] +614 month = filename.split('_')[1][4:6] +615 day = filename.split('_')[1][6:] +616 hour = filename.split('_')[2][0:2] +617 minute = filename.split('_')[2][2:4] +618 time = f"{hour}:{minute}" +619 return float(year), float(month), float(day), time +620 except: +621 return None, None, None, None +622 +623 # Function to calculate the absolute difference between two dates +624 def date_difference(row, target_year, target_month, target_day): +625 return abs(row['year'] - target_year) + abs(row['month'] - target_month) + abs( +626 row['day'] - target_day) +627 +628 # Function to calculate the absolute difference between two times +629 def time_difference(target_time, df_time): +630 df_time_str = str(df_time) +631 target_hour, target_minute = [int(target_time.split(':')[0]), int(target_time.split(':')[1])] +632 try: +633 df_hour, df_minute = [int(df_time_str.split(':')[0]), int(df_time_str.split(':')[1])] +634 except: +635 return None +636 return abs((target_hour * 60 + target_minute) - (df_hour * 60 + df_minute)) +637 +638 # Check if the master sheet is already cached +639 if CTD._cached_master_sheet is None: +640 # Load the master sheet and cache it +641 CTD._cached_master_sheet = pd.read_excel(master_sheet_path) +642 +643 # Use the cached master sheet data +644 master_df = CTD._cached_master_sheet.copy() +645 # Get date and time components from the filename +646 year, month, day, time = get_date_from_string(filename) +647 if year is None: +648 return +649 # Calculate absolute differences for each row in 'master_df' +650 master_df['date_difference'] = master_df.apply(date_difference, args=(year, month, day), axis=1) +651 master_df['time_difference'] = master_df['time_local'].apply(lambda x: time_difference(time, x)) +652 # Find the rows with the smallest total difference for date +653 smallest_date_difference = master_df['date_difference'].min() +654 closest_date_rows = master_df[master_df['date_difference'] == smallest_date_difference] +655 # Check if time_difference returns None +656 if closest_date_rows['time_difference'].isnull().any(): +657 closest_time_time = None +658 closest_row_overall = closest_date_rows.iloc[0] +659 else: +660 # If there are multiple rows with the smallest date difference, select the row with the smallest time difference +661 if len(closest_date_rows) > 1: +662 closest_time_row = closest_date_rows.loc[closest_date_rows['time_difference'].idxmin()] +663 closest_row_overall = closest_time_row +664 closest_time_time = closest_row_overall['time_local'] +665 else: +666 closest_row_overall = closest_date_rows.iloc[0] +667 closest_time_time = closest_row_overall['time_local'] +668 latitude = closest_row_overall['latitude'] +669 longitude = closest_row_overall['longitude'] +670 unique_id = closest_row_overall.iloc[0] +671 RBRfilename = filename + "_gpscm" +672 # Access the closest date components +673 closest_date_year = closest_row_overall['year'] +674 closest_date_month = closest_row_overall['month'] +675 closest_date_day = closest_row_overall['day'] +676 # Print the closest date and time +677 print("|-ESTIMATION ALERT-|") +678 print("Had to guess location on file: " + filename) +679 print("Unique ID: " + unique_id) +680 print("Closest Date (Year, Month, Day):", closest_date_year, closest_date_month, closest_date_day) +681 print("Lat: " + str(latitude)) +682 print("Long: " + str(longitude)) +683 if closest_time_time: +684 print("Closest Time:", closest_time_time) +685 print("====================") +686 return latitude, longitude, RBRfilename
681 def get_sample_location(self, rsk, filename): -682 """ -683 Retrieves the sample location data from the RSK file. If no location data is found, -684 it attempts to estimate the location using the master sheet. Returns the latitude, -685 longitude, and updated filename. -686 -687 Parameters -688 ---------- -689 rsk : RSK -690 Ruskin object of the RSK file. -691 filename : str -692 The filename of the RSK file. + 688 def get_sample_location(self, rsk, filename): +689 """ +690 Retrieves the sample location data from the RSK file. If no location data is found, +691 it attempts to estimate the location using the master sheet. Returns the latitude, +692 longitude, and updated filename. 693 -694 Returns -695 ------- -696 tuple -697 A tuple containing the latitude associated with the sample, longitude associated with the sample, -698 and the filename, adds _gps if the location was in the ruskin file, -699 _gpscm if located via mastersheet, or _gpserror if unable to locate. -700 """ -701 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one -702 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) -703 if geo_data_length < 1: -704 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -705 self.mastersheet, filename) -706 return latitude_intermediate, longitude_intermediate, filename -707 else: -708 for geo in itertools.islice(rsk.geodata(), None): -709 # Is there geo data? -710 if geo.latitude is not None: -711 # If there is, is it from the southern ocean? -712 if not (geo.latitude > -60): -713 try: -714 latitude_intermediate = geo.latitude[0] -715 longitude_intermediate = geo.longitude[0] -716 filename += "_gps" -717 return latitude_intermediate, longitude_intermediate, filename -718 except: -719 latitude_intermediate = geo.latitude -720 longitude_intermediate = geo.longitude -721 filename += "_gps" -722 return latitude_intermediate, longitude_intermediate, filename -723 else: -724 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -725 self.mastersheet, filename) -726 return latitude_intermediate, longitude_intermediate, filename -727 else: -728 return None, None, filename + 'gpserror' +694 Parameters +695 ---------- +696 rsk : RSK +697 Ruskin object of the RSK file. +698 filename : str +699 The filename of the RSK file. +700 +701 Returns +702 ------- +703 tuple +704 A tuple containing the latitude associated with the sample, longitude associated with the sample, +705 and the filename, adds _gps if the location was in the ruskin file, +706 _gpscm if located via mastersheet, or _gpserror if unable to locate. +707 """ +708 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one +709 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) +710 if geo_data_length < 1: +711 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +712 self.mastersheet, filename) +713 return latitude_intermediate, longitude_intermediate, filename +714 else: +715 for geo in itertools.islice(rsk.geodata(), None): +716 # Is there geo data? +717 if geo.latitude is not None: +718 # If there is, is it from the southern ocean? +719 if not (geo.latitude > -60): +720 try: +721 latitude_intermediate = geo.latitude[0] +722 longitude_intermediate = geo.longitude[0] +723 filename += "_gps" +724 return latitude_intermediate, longitude_intermediate, filename +725 except: +726 latitude_intermediate = geo.latitude +727 longitude_intermediate = geo.longitude +728 filename += "_gps" +729 return latitude_intermediate, longitude_intermediate, filename +730 else: +731 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +732 self.mastersheet, filename) +733 return latitude_intermediate, longitude_intermediate, filename +734 else: +735 return None, None, filename + 'gpserror' @@ -4115,28 +4146,28 @@ Returns
688 def get_sample_location(self, rsk, filename): +689 """ +690 Retrieves the sample location data from the RSK file. If no location data is found, +691 it attempts to estimate the location using the master sheet. Returns the latitude, +692 longitude, and updated filename. 693 -694 Returns -695 ------- -696 tuple -697 A tuple containing the latitude associated with the sample, longitude associated with the sample, -698 and the filename, adds _gps if the location was in the ruskin file, -699 _gpscm if located via mastersheet, or _gpserror if unable to locate. -700 """ -701 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one -702 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) -703 if geo_data_length < 1: -704 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -705 self.mastersheet, filename) -706 return latitude_intermediate, longitude_intermediate, filename -707 else: -708 for geo in itertools.islice(rsk.geodata(), None): -709 # Is there geo data? -710 if geo.latitude is not None: -711 # If there is, is it from the southern ocean? -712 if not (geo.latitude > -60): -713 try: -714 latitude_intermediate = geo.latitude[0] -715 longitude_intermediate = geo.longitude[0] -716 filename += "_gps" -717 return latitude_intermediate, longitude_intermediate, filename -718 except: -719 latitude_intermediate = geo.latitude -720 longitude_intermediate = geo.longitude -721 filename += "_gps" -722 return latitude_intermediate, longitude_intermediate, filename -723 else: -724 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( -725 self.mastersheet, filename) -726 return latitude_intermediate, longitude_intermediate, filename -727 else: -728 return None, None, filename + 'gpserror' +694 Parameters +695 ---------- +696 rsk : RSK +697 Ruskin object of the RSK file. +698 filename : str +699 The filename of the RSK file. +700 +701 Returns +702 ------- +703 tuple +704 A tuple containing the latitude associated with the sample, longitude associated with the sample, +705 and the filename, adds _gps if the location was in the ruskin file, +706 _gpscm if located via mastersheet, or _gpserror if unable to locate. +707 """ +708 # Adding geo data, assumes no drift and uses the first lat long in the file if there is one +709 geo_data_length = len(list(itertools.islice(rsk.geodata(), None))) +710 if geo_data_length < 1: +711 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +712 self.mastersheet, filename) +713 return latitude_intermediate, longitude_intermediate, filename +714 else: +715 for geo in itertools.islice(rsk.geodata(), None): +716 # Is there geo data? +717 if geo.latitude is not None: +718 # If there is, is it from the southern ocean? +719 if not (geo.latitude > -60): +720 try: +721 latitude_intermediate = geo.latitude[0] +722 longitude_intermediate = geo.longitude[0] +723 filename += "_gps" +724 return latitude_intermediate, longitude_intermediate, filename +725 except: +726 latitude_intermediate = geo.latitude +727 longitude_intermediate = geo.longitude +728 filename += "_gps" +729 return latitude_intermediate, longitude_intermediate, filename +730 else: +731 latitude_intermediate, longitude_intermediate, filename = self.process_master_sheet( +732 self.mastersheet, filename) +733 return latitude_intermediate, longitude_intermediate, filename +734 else: +735 return None, None, filename + 'gpserror'
730 def remove_sample_timezone_indicator(self, df): -731 """ -732 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the -733 given DataFrame. Returns the updated DataFrame. -734 -735 Parameters -736 ---------- -737 df : DataFrame -738 The DataFrame to process. -739 -740 Returns -741 ------- -742 DataFrame -743 The updated DataFrame with the timezone indicator removed. -744 """ -745 if self.no_values_in_object(df): -746 return None -747 if 'timestamp' in df.columns: -748 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] -749 return df -750 else: -751 return df + 737 def remove_sample_timezone_indicator(self, df): +738 """ +739 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the +740 given DataFrame. Returns the updated DataFrame. +741 +742 Parameters +743 ---------- +744 df : DataFrame +745 The DataFrame to process. +746 +747 Returns +748 ------- +749 DataFrame +750 The updated DataFrame with the timezone indicator removed. +751 """ +752 if self.no_values_in_object(df): +753 return None +754 if 'timestamp' in df.columns: +755 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] +756 return df +757 else: +758 return df @@ -4170,30 +4201,30 @@ Returns
737 def remove_sample_timezone_indicator(self, df): +738 """ +739 Removes the timezone indicator (e.g., '+00:00') from the 'timestamp' column of the +740 given DataFrame. Returns the updated DataFrame. +741 +742 Parameters +743 ---------- +744 df : DataFrame +745 The DataFrame to process. +746 +747 Returns +748 ------- +749 DataFrame +750 The updated DataFrame with the timezone indicator removed. +751 """ +752 if self.no_values_in_object(df): +753 return None +754 if 'timestamp' in df.columns: +755 df['timestamp'] = df['timestamp'].astype(str).str.split('+').str[0] +756 return df +757 else: +758 return df
753 def remove_rows_with_negative_depth(self, df): -754 """ -755 Removes rows from the given DataFrame where the 'depth_00' column has negative values. -756 Returns the updated DataFrame. -757 -758 Parameter -759 --------- -760 df : DataFrame -761 The DataFrame to process. -762 -763 Returns -764 ------- -765 DataFrame -766 The updated DataFrame with rows containing negative depth values removed. -767 """ -768 if self.no_values_in_object(df): -769 return None -770 if 'depth_00' in df.columns: -771 df = df[df['depth_00'] >= 0].reset_index(drop=True) -772 else: -773 return None -774 if self.no_values_in_object(df): -775 return None -776 return df.copy() + 760 def remove_rows_with_negative_depth(self, df): +761 """ +762 Removes rows from the given DataFrame where the 'depth_00' column has negative values. +763 Returns the updated DataFrame. +764 +765 Parameter +766 --------- +767 df : DataFrame +768 The DataFrame to process. +769 +770 Returns +771 ------- +772 DataFrame +773 The updated DataFrame with rows containing negative depth values removed. +774 """ +775 if self.no_values_in_object(df): +776 return None +777 if 'depth_00' in df.columns: +778 df = df[df['depth_00'] >= 0].reset_index(drop=True) +779 else: +780 return None +781 if self.no_values_in_object(df): +782 return None +783 return df.copy() @@ -4225,30 +4256,30 @@ Returns
760 def remove_rows_with_negative_depth(self, df): +761 """ +762 Removes rows from the given DataFrame where the 'depth_00' column has negative values. +763 Returns the updated DataFrame. +764 +765 Parameter +766 --------- +767 df : DataFrame +768 The DataFrame to process. +769 +770 Returns +771 ------- +772 DataFrame +773 The updated DataFrame with rows containing negative depth values removed. +774 """ +775 if self.no_values_in_object(df): +776 return None +777 if 'depth_00' in df.columns: +778 df = df[df['depth_00'] >= 0].reset_index(drop=True) +779 else: +780 return None +781 if self.no_values_in_object(df): +782 return None +783 return df.copy()
778 def remove_rows_with_negative_salinity(self, df): -779 """ -780 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. -781 Returns the updated DataFrame. -782 -783 Parameters -784 ---------- -785 df: DataFrame -786 The DataFrame to process. -787 -788 Returns -789 ------- -790 DataFrame -791 The updated DataFrame with rows containing negative salinity values removed. -792 """ -793 if self.no_values_in_object(df): -794 return None -795 if 'salinity_00' in df.columns: -796 df = df[df['salinity_00'] >= 0].reset_index(drop=True) -797 else: -798 return None -799 if self.no_values_in_object(df): -800 return None -801 return df.copy() + 785 def remove_rows_with_negative_salinity(self, df): +786 """ +787 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. +788 Returns the updated DataFrame. +789 +790 Parameters +791 ---------- +792 df: DataFrame +793 The DataFrame to process. +794 +795 Returns +796 ------- +797 DataFrame +798 The updated DataFrame with rows containing negative salinity values removed. +799 """ +800 if self.no_values_in_object(df): +801 return None +802 if 'salinity_00' in df.columns: +803 df = df[df['salinity_00'] >= 0].reset_index(drop=True) +804 else: +805 return None +806 if self.no_values_in_object(df): +807 return None +808 return df.copy() @@ -4282,30 +4313,30 @@ Returns
785 def remove_rows_with_negative_salinity(self, df): +786 """ +787 Removes rows from the given DataFrame where the 'salinity_00' column has negative values. +788 Returns the updated DataFrame. +789 +790 Parameters +791 ---------- +792 df: DataFrame +793 The DataFrame to process. +794 +795 Returns +796 ------- +797 DataFrame +798 The updated DataFrame with rows containing negative salinity values removed. +799 """ +800 if self.no_values_in_object(df): +801 return None +802 if 'salinity_00' in df.columns: +803 df = df[df['salinity_00'] >= 0].reset_index(drop=True) +804 else: +805 return None +806 if self.no_values_in_object(df): +807 return None +808 return df.copy()
803 def remove_rows_with_negative_pressure(self, df): -804 """ -805 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. -806 Returns the updated DataFrame. -807 -808 Parameters -809 ---------- -810 df: DataFrame -811 The DataFrame to process. -812 -813 Returns -814 ------- -815 DataFrame -816 The updated DataFrame with rows containing negative pressure values removed. -817 """ -818 if self.no_values_in_object(df): -819 return None -820 if 'pressure_00' in df.columns: -821 df = df[df['pressure_00'] >= 0].reset_index(drop=True) -822 else: -823 return None -824 if self.no_values_in_object(df): -825 return None -826 return df.copy() + 810 def remove_rows_with_negative_pressure(self, df): +811 """ +812 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. +813 Returns the updated DataFrame. +814 +815 Parameters +816 ---------- +817 df: DataFrame +818 The DataFrame to process. +819 +820 Returns +821 ------- +822 DataFrame +823 The updated DataFrame with rows containing negative pressure values removed. +824 """ +825 if self.no_values_in_object(df): +826 return None +827 if 'pressure_00' in df.columns: +828 df = df[df['pressure_00'] >= 0].reset_index(drop=True) +829 else: +830 return None +831 if self.no_values_in_object(df): +832 return None +833 return df.copy() @@ -4339,30 +4370,30 @@ Returns
810 def remove_rows_with_negative_pressure(self, df): +811 """ +812 Removes rows from the given DataFrame where the 'pressure_00' column has negative values. +813 Returns the updated DataFrame. +814 +815 Parameters +816 ---------- +817 df: DataFrame +818 The DataFrame to process. +819 +820 Returns +821 ------- +822 DataFrame +823 The updated DataFrame with rows containing negative pressure values removed. +824 """ +825 if self.no_values_in_object(df): +826 return None +827 if 'pressure_00' in df.columns: +828 df = df[df['pressure_00'] >= 0].reset_index(drop=True) +829 else: +830 return None +831 if self.no_values_in_object(df): +832 return None +833 return df.copy()
828 def remove_rows_with_negative_salinityabs(self, df): -829 """ -830 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. -831 Returns the updated DataFrame. -832 -833 Parameters -834 ---------- -835 df: DataFrame -836 The DataFrame to process. -837 -838 Returns -839 ------- -840 DataFrame -841 The updated DataFrame with rows containing negative absolute salinity values removed. -842 """ -843 if self.no_values_in_object(df): -844 return None -845 if 'salinityabs' in df.columns: -846 df = df[df['salinityabs'] >= 0].reset_index(drop=True) -847 else: -848 return None -849 if self.no_values_in_object(df): -850 return None -851 return df.copy() + 835 def remove_rows_with_negative_salinityabs(self, df): +836 """ +837 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. +838 Returns the updated DataFrame. +839 +840 Parameters +841 ---------- +842 df: DataFrame +843 The DataFrame to process. +844 +845 Returns +846 ------- +847 DataFrame +848 The updated DataFrame with rows containing negative absolute salinity values removed. +849 """ +850 if self.no_values_in_object(df): +851 return None +852 if 'salinityabs' in df.columns: +853 df = df[df['salinityabs'] >= 0].reset_index(drop=True) +854 else: +855 return None +856 if self.no_values_in_object(df): +857 return None +858 return df.copy() @@ -4396,30 +4427,30 @@ Returns
835 def remove_rows_with_negative_salinityabs(self, df): +836 """ +837 Removes rows from the given DataFrame where the 'salinityabs' column has negative values. +838 Returns the updated DataFrame. +839 +840 Parameters +841 ---------- +842 df: DataFrame +843 The DataFrame to process. +844 +845 Returns +846 ------- +847 DataFrame +848 The updated DataFrame with rows containing negative absolute salinity values removed. +849 """ +850 if self.no_values_in_object(df): +851 return None +852 if 'salinityabs' in df.columns: +853 df = df[df['salinityabs'] >= 0].reset_index(drop=True) +854 else: +855 return None +856 if self.no_values_in_object(df): +857 return None +858 return df.copy()
853 def remove_rows_with_negative_density(self, df): -854 """ -855 Removes rows from the given DataFrame where the 'density' column has negative values. -856 Returns the updated DataFrame. -857 -858 Parameters -859 ---------- -860 df: DataFrame -861 The DataFrame to process. -862 -863 Returns -864 ------- -865 DataFrame -866 The updated DataFrame with rows containing negative density values removed. -867 """ -868 if self.no_values_in_object(df): -869 return None -870 if 'density' in df.columns: -871 df = df[df['density'] >= 0].reset_index(drop=True) -872 else: -873 return None -874 if self.no_values_in_object(df): -875 return None -876 return df.copy() + 860 def remove_rows_with_negative_density(self, df): +861 """ +862 Removes rows from the given DataFrame where the 'density' column has negative values. +863 Returns the updated DataFrame. +864 +865 Parameters +866 ---------- +867 df: DataFrame +868 The DataFrame to process. +869 +870 Returns +871 ------- +872 DataFrame +873 The updated DataFrame with rows containing negative density values removed. +874 """ +875 if self.no_values_in_object(df): +876 return None +877 if 'density' in df.columns: +878 df = df[df['density'] >= 0].reset_index(drop=True) +879 else: +880 return None +881 if self.no_values_in_object(df): +882 return None +883 return df.copy() @@ -4454,365 +4485,365 @@ Returns
860 def remove_rows_with_negative_density(self, df): +861 """ +862 Removes rows from the given DataFrame where the 'density' column has negative values. +863 Returns the updated DataFrame. +864 +865 Parameters +866 ---------- +867 df: DataFrame +868 The DataFrame to process. +869 +870 Returns +871 ------- +872 DataFrame +873 The updated DataFrame with rows containing negative density values removed. +874 """ +875 if self.no_values_in_object(df): +876 return None +877 if 'density' in df.columns: +878 df = df[df['density'] >= 0].reset_index(drop=True) +879 else: +880 return None +881 if self.no_values_in_object(df): +882 return None +883 return df.copy()
879class Calculate: - 880 """ - 881 Calculate - 882 ---------- - 883 - 884 Class for CTD data calculations. - 885 """ - 886 - 887 @staticmethod - 888 def gsw_infunnel(SA, CT, p): - 889 """ - 890 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), - 891 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. - 892 - 893 Parameters - 894 ---------- - 895 SA : Series - 896 Absolute Salinity in g/kg. - 897 CT : Series - 898 Conservative Temperature in degrees Celsius. - 899 p : Series - 900 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). - 901 - 902 Returns - 903 ------- - 904 Series of bool - 905 A boolean array where True indicates the values are inside the "oceanographic funnel". - 906 """ - 907 # Ensure all inputs are Series and aligned - 908 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( - 909 isinstance(p, pd.Series) or np.isscalar(p))): - 910 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") - 911 - 912 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: - 913 raise CTDError("", "Indices of SA, CT, and p must be aligned") - 914 - 915 if np.isscalar(p): - 916 p = pd.Series(p, index=SA.index) - 917 - 918 # Define the funnel conditions - 919 CT_freezing_p = gsw.CT_freezing(SA, p, 0) - 920 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) + 886class Calculate: + 887 """ + 888 Calculate + 889 ---------- + 890 + 891 Class for CTD data calculations. + 892 """ + 893 + 894 @staticmethod + 895 def gsw_infunnel(SA, CT, p): + 896 """ + 897 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), + 898 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. + 899 + 900 Parameters + 901 ---------- + 902 SA : Series + 903 Absolute Salinity in g/kg. + 904 CT : Series + 905 Conservative Temperature in degrees Celsius. + 906 p : Series + 907 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). + 908 + 909 Returns + 910 ------- + 911 Series of bool + 912 A boolean array where True indicates the values are inside the "oceanographic funnel". + 913 """ + 914 # Ensure all inputs are Series and aligned + 915 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( + 916 isinstance(p, pd.Series) or np.isscalar(p))): + 917 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") + 918 + 919 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: + 920 raise CTDError("", "Indices of SA, CT, and p must be aligned") 921 - 922 in_funnel = pd.Series(True, index=SA.index) # Default all to True - 923 condition = ( - 924 (p > 8000) | - 925 (SA < 0) | (SA > 42) | - 926 ((p < 500) & (CT < CT_freezing_p)) | - 927 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | - 928 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | - 929 ((p >= 500) & (CT < CT_freezing_500)) | - 930 ((p >= 6500) & (SA < 30)) | - 931 ((p >= 6500) & (CT > 10.0)) | - 932 SA.isna() | CT.isna() | p.isna() - 933 ) - 934 in_funnel[condition] = False - 935 - 936 return in_funnel - 937 - 938 @staticmethod - 939 def calculate_and_drop_salinity_spikes(df): - 940 """ - 941 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable - 942 changes in salinity with depth. - 943 - 944 Parameters - 945 ---------- - 946 df : DataFrame - 947 DataFrame containing depth and salinity data - 948 - 949 Returns - 950 ------- - 951 DataFrame - 952 DataFrame after removing salinity spikes - 953 """ - 954 acceptable_delta_salinity_per_depth = [ - 955 (0.0005, 0.001), - 956 (0.005, 0.01), - 957 (0.05, 0.1), - 958 (0.5, 1) - 959 ] - 960 if df.empty: - 961 return None - 962 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors - 963 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') - 964 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') - 965 - 966 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN - 967 df = df.dropna(subset=['depth_00', 'salinity_00']) - 968 - 969 # Check if there is enough depth range to calculate - 970 min_depth = df['depth_00'].min() - 971 max_depth = df['depth_00'].max() - 972 if min_depth == max_depth: - 973 print("Insufficient depth range to calculate.") - 974 return df + 922 if np.isscalar(p): + 923 p = pd.Series(p, index=SA.index) + 924 + 925 # Define the funnel conditions + 926 CT_freezing_p = gsw.CT_freezing(SA, p, 0) + 927 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) + 928 + 929 in_funnel = pd.Series(True, index=SA.index) # Default all to True + 930 condition = ( + 931 (p > 8000) | + 932 (SA < 0) | (SA > 42) | + 933 ((p < 500) & (CT < CT_freezing_p)) | + 934 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | + 935 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | + 936 ((p >= 500) & (CT < CT_freezing_500)) | + 937 ((p >= 6500) & (SA < 30)) | + 938 ((p >= 6500) & (CT > 10.0)) | + 939 SA.isna() | CT.isna() | p.isna() + 940 ) + 941 in_funnel[condition] = False + 942 + 943 return in_funnel + 944 + 945 @staticmethod + 946 def calculate_and_drop_salinity_spikes(df): + 947 """ + 948 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable + 949 changes in salinity with depth. + 950 + 951 Parameters + 952 ---------- + 953 df : DataFrame + 954 DataFrame containing depth and salinity data + 955 + 956 Returns + 957 ------- + 958 DataFrame + 959 DataFrame after removing salinity spikes + 960 """ + 961 acceptable_delta_salinity_per_depth = [ + 962 (0.0005, 0.001), + 963 (0.005, 0.01), + 964 (0.05, 0.1), + 965 (0.5, 1) + 966 ] + 967 if df.empty: + 968 return None + 969 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors + 970 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') + 971 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') + 972 + 973 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN + 974 df = df.dropna(subset=['depth_00', 'salinity_00']) 975 - 976 def recursively_drop(df, depth_range, acceptable_delta, i): - 977 try: - 978 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points - 979 except: - 980 print("Error in calculating number of points.") - 981 return df - 982 ranges = np.linspace(min_depth, max_depth, num=num_points) - 983 - 984 # Group by these ranges - 985 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) - 986 - 987 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 - 988 filtered_groups = groups.filter( - 989 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) - 990 # Get the indices of the filtered groups - 991 filtered_indices = filtered_groups.index - 992 return filtered_groups + 976 # Check if there is enough depth range to calculate + 977 min_depth = df['depth_00'].min() + 978 max_depth = df['depth_00'].max() + 979 if min_depth == max_depth: + 980 print("Insufficient depth range to calculate.") + 981 return df + 982 + 983 def recursively_drop(df, depth_range, acceptable_delta, i): + 984 try: + 985 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points + 986 except: + 987 print("Error in calculating number of points.") + 988 return df + 989 ranges = np.linspace(min_depth, max_depth, num=num_points) + 990 + 991 # Group by these ranges + 992 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) 993 - 994 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): - 995 df = recursively_drop(df, deltas[0], deltas[1], i) - 996 return df - 997 - 998 @staticmethod - 999 def calculate_overturns(ctd_array): -1000 """ -1001 Calculates density overturns in the CTD data where denser water lies above lighter water with density -1002 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. -1003 -1004 Parameters -1005 ---------- -1006 ctd_array : DataFrame -1007 DataFrame containing depth, density, and timestamp data -1008 -1009 Returns -1010 ------- -1011 DataFrame -1012 DataFrame with identified density overturns -1013 """ -1014 # Sort DataFrame by depth in ascending order -1015 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) -1016 # Calculate density change and identify overturns -1017 ctd_array['density_change'] = ctd_array[ -1018 'density'].diff() # Difference in density between consecutive measurements -1019 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 -1020 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) -1021 if 'density_change' in ctd_array.columns: -1022 ctd_array = ctd_array.drop('density_change', axis=1) -1023 return ctd_array -1024 -1025 @staticmethod -1026 def calculate_absolute_density(ctd_array): -1027 """ -1028 Calculates absolute density from the CTD data using the TEOS-10 equations, -1029 ensuring all data points are within the valid oceanographic funnel. -1030 -1031 Parameters -1032 ---------- -1033 ctd_array : DataFrame -1034 DataFrame containing salinity, temperature, and pressure data -1035 -1036 Returns -1037 ------- -1038 Series -1039 Series with calculated absolute density -1040 """ -1041 SA = ctd_array['salinity_00'] -1042 t = ctd_array['temperature_00'] -1043 p = ctd_array['pressure_00'] -1044 CT = gsw.CT_from_t(SA, t, p) -1045 if Calculate.gsw_infunnel(SA, CT, p).all(): -1046 return gsw.density.rho_t_exact(SA, t, p) -1047 else: -1048 raise CTDError("", "Sample not in funnel, could not calculate density.") -1049 -1050 @staticmethod -1051 def calculate_absolute_salinity(ctd_array): -1052 """ -1053 Calculates absolute salinity from practical salinity, pressure, -1054 and geographical coordinates using the TEOS-10 salinity conversion formulas. -1055 -1056 Parameters -1057 ---------- -1058 ctd_array : DataFrame -1059 DataFrame containing practical salinity, pressure, longitude, and latitude data -1060 -1061 Returns -1062 ------- -1063 Series -1064 Series with calculated absolute salinity -1065 """ -1066 SP = ctd_array['salinity_00'] -1067 p = ctd_array['pressure_00'] -1068 lon = ctd_array['longitude'] -1069 lat = ctd_array['latitude'] -1070 return gsw.conversions.SA_from_SP(SP, p, lon, lat) -1071 -1072 @staticmethod -1073 def calculate_mld(densities, depths, reference_depth, delta = 0.03): -1074 """ -1075 Calculates the mixed layer depth (MLD) using the density threshold method. -1076 MLD is the depth at which the density exceeds the reference density -1077 by a predefined amount delta, which defaults to (0.03 kg/m³). + 994 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 + 995 filtered_groups = groups.filter( + 996 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) + 997 # Get the indices of the filtered groups + 998 filtered_indices = filtered_groups.index + 999 return filtered_groups +1000 +1001 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): +1002 df = recursively_drop(df, deltas[0], deltas[1], i) +1003 return df +1004 +1005 @staticmethod +1006 def calculate_overturns(ctd_array): +1007 """ +1008 Calculates density overturns in the CTD data where denser water lies above lighter water with density +1009 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. +1010 +1011 Parameters +1012 ---------- +1013 ctd_array : DataFrame +1014 DataFrame containing depth, density, and timestamp data +1015 +1016 Returns +1017 ------- +1018 DataFrame +1019 DataFrame with identified density overturns +1020 """ +1021 # Sort DataFrame by depth in ascending order +1022 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) +1023 # Calculate density change and identify overturns +1024 ctd_array['density_change'] = ctd_array[ +1025 'density'].diff() # Difference in density between consecutive measurements +1026 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 +1027 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) +1028 if 'density_change' in ctd_array.columns: +1029 ctd_array = ctd_array.drop('density_change', axis=1) +1030 return ctd_array +1031 +1032 @staticmethod +1033 def calculate_absolute_density(ctd_array): +1034 """ +1035 Calculates absolute density from the CTD data using the TEOS-10 equations, +1036 ensuring all data points are within the valid oceanographic funnel. +1037 +1038 Parameters +1039 ---------- +1040 ctd_array : DataFrame +1041 DataFrame containing salinity, temperature, and pressure data +1042 +1043 Returns +1044 ------- +1045 Series +1046 Series with calculated absolute density +1047 """ +1048 SA = ctd_array['salinity_00'] +1049 t = ctd_array['temperature_00'] +1050 p = ctd_array['pressure_00'] +1051 CT = gsw.CT_from_t(SA, t, p) +1052 if Calculate.gsw_infunnel(SA, CT, p).all(): +1053 return gsw.density.rho_t_exact(SA, t, p) +1054 else: +1055 raise CTDError("", "Sample not in funnel, could not calculate density.") +1056 +1057 @staticmethod +1058 def calculate_absolute_salinity(ctd_array): +1059 """ +1060 Calculates absolute salinity from practical salinity, pressure, +1061 and geographical coordinates using the TEOS-10 salinity conversion formulas. +1062 +1063 Parameters +1064 ---------- +1065 ctd_array : DataFrame +1066 DataFrame containing practical salinity, pressure, longitude, and latitude data +1067 +1068 Returns +1069 ------- +1070 Series +1071 Series with calculated absolute salinity +1072 """ +1073 SP = ctd_array['salinity_00'] +1074 p = ctd_array['pressure_00'] +1075 lon = ctd_array['longitude'] +1076 lat = ctd_array['latitude'] +1077 return gsw.conversions.SA_from_SP(SP, p, lon, lat) 1078 -1079 Parameters -1080 ---------- -1081 densities : Series -1082 Series of densities -1083 depths : Series -1084 Series of depths corresponding to densities -1085 reference_depth : float -1086 The depth at which to anchor the reference density -1087 delta : float, optional -1088 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1089 -1090 Returns -1091 ------- -1092 tuple -1093 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1094 """ -1095 # Convert to numeric and ensure no NaNs remain -1096 densities = densities.apply(pd.to_numeric, errors='coerce') -1097 depths = depths.apply(pd.to_numeric, errors='coerce') -1098 densities = densities.dropna(how='any').reset_index(drop=True) -1099 depths = depths.dropna(how='any').reset_index(drop=True) -1100 reference_depth = int(reference_depth) -1101 if len(depths) == 0 or len(densities) == 0: -1102 return None -1103 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) -1104 sorted_depths, sorted_densities = zip(*sorted_data) -1105 # Determine reference density -1106 reference_density = None -1107 for i, depth in enumerate(sorted_depths): -1108 if depth >= reference_depth: -1109 if depth == reference_depth: -1110 reference_density = sorted_densities[i] -1111 reference_depth = sorted_depths[i] -1112 else: -1113 # Linear interpolation -1114 try: -1115 reference_density = sorted_densities[i - 1] + ( -1116 (sorted_densities[i] - sorted_densities[i - 1]) * ( -1117 (reference_depth - sorted_depths[i - 1]) / -1118 (sorted_depths[i] - sorted_depths[i - 1]))) -1119 except: -1120 raise CTDError("", -1121 f"Insufficient depth range to calculate MLD. " -1122 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") -1123 break -1124 if reference_density is None: -1125 return None -1126 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1127 for depth, density in zip(sorted_depths, sorted_densities): -1128 if density > reference_density + delta and depth >= reference_depth: -1129 return depth, reference_depth -1130 return None # If no depth meets the criterion -1131 -1132 @staticmethod -1133 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): -1134 """ -1135 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and -1136 then determine the depth where the smoothed density exceeds the reference density -1137 by a predefined amount which defaults to 0.03 kg/m³. +1079 @staticmethod +1080 def calculate_mld(densities, depths, reference_depth, delta = 0.03): +1081 """ +1082 Calculates the mixed layer depth (MLD) using the density threshold method. +1083 MLD is the depth at which the density exceeds the reference density +1084 by a predefined amount delta, which defaults to (0.03 kg/m³). +1085 +1086 Parameters +1087 ---------- +1088 densities : Series +1089 Series of densities +1090 depths : Series +1091 Series of depths corresponding to densities +1092 reference_depth : float +1093 The depth at which to anchor the reference density +1094 delta : float, optional +1095 The difference in density which would indicate the MLD, defaults to 0.03 kg/m³. +1096 +1097 Returns +1098 ------- +1099 tuple +1100 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1101 """ +1102 # Convert to numeric and ensure no NaNs remain +1103 densities = densities.apply(pd.to_numeric, errors='coerce') +1104 depths = depths.apply(pd.to_numeric, errors='coerce') +1105 densities = densities.dropna(how='any').reset_index(drop=True) +1106 depths = depths.dropna(how='any').reset_index(drop=True) +1107 reference_depth = int(reference_depth) +1108 if len(depths) == 0 or len(densities) == 0: +1109 return None +1110 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) +1111 sorted_depths, sorted_densities = zip(*sorted_data) +1112 # Determine reference density +1113 reference_density = None +1114 for i, depth in enumerate(sorted_depths): +1115 if depth >= reference_depth: +1116 if depth == reference_depth: +1117 reference_density = sorted_densities[i] +1118 reference_depth = sorted_depths[i] +1119 else: +1120 # Linear interpolation +1121 try: +1122 reference_density = sorted_densities[i - 1] + ( +1123 (sorted_densities[i] - sorted_densities[i - 1]) * ( +1124 (reference_depth - sorted_depths[i - 1]) / +1125 (sorted_depths[i] - sorted_depths[i - 1]))) +1126 except: +1127 raise CTDError("", +1128 f"Insufficient depth range to calculate MLD. " +1129 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") +1130 break +1131 if reference_density is None: +1132 return None +1133 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1134 for depth, density in zip(sorted_depths, sorted_densities): +1135 if density > reference_density + delta and depth >= reference_depth: +1136 return depth, reference_depth +1137 return None # If no depth meets the criterion 1138 -1139 Parameters -1140 ---------- -1141 densities : Series -1142 Series of densities -1143 depths : Series -1144 Series of depths corresponding to densities -1145 reference_depth : -1146 The depth at which to anchor the reference density -1147 delta : float, optional -1148 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1149 -1150 Returns -1151 ------- -1152 tuple -1153 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1154 """ -1155 # Ensure input is pandas Series and drop NA values -1156 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): -1157 densities = densities.dropna().reset_index(drop=True) -1158 depths = depths.dropna().reset_index(drop=True) -1159 -1160 # Convert to numeric and ensure no NaNs remain -1161 densities = densities.apply(pd.to_numeric, errors='coerce') -1162 depths = depths.apply(pd.to_numeric, errors='coerce') -1163 densities = densities.dropna().reset_index(drop=True) -1164 depths = depths.dropna().reset_index(drop=True) -1165 if densities.empty or depths.empty: -1166 return None, None -1167 -1168 # Convert pandas Series to numpy arrays for NumPy operations -1169 densities = densities.to_numpy() -1170 depths = depths.to_numpy() -1171 -1172 # Remove duplicates by averaging densities at the same depth -1173 unique_depths, indices = np.unique(depths, return_inverse=True) -1174 average_densities = np.zeros_like(unique_depths) -1175 np.add.at(average_densities, indices, densities) -1176 counts = np.zeros_like(unique_depths) -1177 np.add.at(counts, indices, 1) -1178 average_densities /= counts -1179 -1180 # Apply LOESS smoothing -1181 lowess = statsmodels.api.nonparametric.lowess -1182 smoothed = lowess(average_densities, unique_depths, frac=0.1) -1183 smoothed_depths, smoothed_densities = zip(*smoothed) -1184 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) -1185 -1186 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1187 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta -1188 and np.array(smoothed_densities) > reference_depth)[0] -1189 if exceeding_indices.size > 0: -1190 mld_depth = smoothed_depths[exceeding_indices[0]] -1191 return mld_depth, reference_depth +1139 @staticmethod +1140 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): +1141 """ +1142 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and +1143 then determine the depth where the smoothed density exceeds the reference density +1144 by a predefined amount which defaults to 0.03 kg/m³. +1145 +1146 Parameters +1147 ---------- +1148 densities : Series +1149 Series of densities +1150 depths : Series +1151 Series of depths corresponding to densities +1152 reference_depth : +1153 The depth at which to anchor the reference density +1154 delta : float, optional +1155 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. +1156 +1157 Returns +1158 ------- +1159 tuple +1160 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1161 """ +1162 # Ensure input is pandas Series and drop NA values +1163 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): +1164 densities = densities.dropna().reset_index(drop=True) +1165 depths = depths.dropna().reset_index(drop=True) +1166 +1167 # Convert to numeric and ensure no NaNs remain +1168 densities = densities.apply(pd.to_numeric, errors='coerce') +1169 depths = depths.apply(pd.to_numeric, errors='coerce') +1170 densities = densities.dropna().reset_index(drop=True) +1171 depths = depths.dropna().reset_index(drop=True) +1172 if densities.empty or depths.empty: +1173 return None, None +1174 +1175 # Convert pandas Series to numpy arrays for NumPy operations +1176 densities = densities.to_numpy() +1177 depths = depths.to_numpy() +1178 +1179 # Remove duplicates by averaging densities at the same depth +1180 unique_depths, indices = np.unique(depths, return_inverse=True) +1181 average_densities = np.zeros_like(unique_depths) +1182 np.add.at(average_densities, indices, densities) +1183 counts = np.zeros_like(unique_depths) +1184 np.add.at(counts, indices, 1) +1185 average_densities /= counts +1186 +1187 # Apply LOESS smoothing +1188 lowess = statsmodels.api.nonparametric.lowess +1189 smoothed = lowess(average_densities, unique_depths, frac=0.1) +1190 smoothed_depths, smoothed_densities = zip(*smoothed) +1191 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) 1192 -1193 return None, None # If no depth meets the criterion -1194 -1195 @staticmethod -1196 def calculate_mean_surface_density(df, range_): -1197 """ -1198 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1193 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1194 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta +1195 and np.array(smoothed_densities) > reference_depth)[0] +1196 if exceeding_indices.size > 0: +1197 mld_depth = smoothed_depths[exceeding_indices[0]] +1198 return mld_depth, reference_depth 1199 -1200 Parameters -1201 ---------- -1202 df : DataFrame -1203 DataFrame containing density data. -1204 range_ : tuple or int -1205 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, -1206 or an integer indicating the number of rows from the start. -1207 -1208 Returns -1209 ------- -1210 float, None -1211 Mean density value of the specified sample or None if unable to calculate. -1212 """ -1213 min_depth = df.index.min() -1214 max_depth = df.index.max() -1215 -1216 if isinstance(range_, tuple): -1217 start, end = range_ -1218 -1219 # Adjust 'start' to ensure it is within the valid range -1220 start = max(start, min_depth) -1221 -1222 # Adjust 'end' to ensure it does not exceed the maximum depth value -1223 end = min(end, max_depth) -1224 -1225 # Ensure start is less than end -1226 if start <= end: -1227 return df.loc[start:end, 'density'].mean() -1228 else: -1229 return None -1230 -1231 elif isinstance(range_, int): -1232 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length -1233 range_ = min(range_, len(df)) -1234 return df.iloc[:range_, df.columns.get_loc('density')].mean() -1235 -1236 else: -1237 raise ValueError("Invalid range type. Must be tuple or int.") +1200 return None, None # If no depth meets the criterion +1201 +1202 @staticmethod +1203 def calculate_mean_surface_density(df, range_): +1204 """ +1205 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1206 +1207 Parameters +1208 ---------- +1209 df : DataFrame +1210 DataFrame containing density data. +1211 range_ : tuple or int +1212 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, +1213 or an integer indicating the number of rows from the start. +1214 +1215 Returns +1216 ------- +1217 float, None +1218 Mean density value of the specified sample or None if unable to calculate. +1219 """ +1220 min_depth = df.index.min() +1221 max_depth = df.index.max() +1222 +1223 if isinstance(range_, tuple): +1224 start, end = range_ +1225 +1226 # Adjust 'start' to ensure it is within the valid range +1227 start = max(start, min_depth) +1228 +1229 # Adjust 'end' to ensure it does not exceed the maximum depth value +1230 end = min(end, max_depth) +1231 +1232 # Ensure start is less than end +1233 if start <= end: +1234 return df.loc[start:end, 'density'].mean() +1235 else: +1236 return None +1237 +1238 elif isinstance(range_, int): +1239 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length +1240 range_ = min(range_, len(df)) +1241 return df.iloc[:range_, df.columns.get_loc('density')].mean() +1242 +1243 else: +1244 raise ValueError("Invalid range type. Must be tuple or int.") @@ -4834,56 +4865,56 @@ Returns
886class Calculate: + 887 """ + 888 Calculate + 889 ---------- + 890 + 891 Class for CTD data calculations. + 892 """ + 893 + 894 @staticmethod + 895 def gsw_infunnel(SA, CT, p): + 896 """ + 897 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), + 898 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. + 899 + 900 Parameters + 901 ---------- + 902 SA : Series + 903 Absolute Salinity in g/kg. + 904 CT : Series + 905 Conservative Temperature in degrees Celsius. + 906 p : Series + 907 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). + 908 + 909 Returns + 910 ------- + 911 Series of bool + 912 A boolean array where True indicates the values are inside the "oceanographic funnel". + 913 """ + 914 # Ensure all inputs are Series and aligned + 915 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( + 916 isinstance(p, pd.Series) or np.isscalar(p))): + 917 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") + 918 + 919 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: + 920 raise CTDError("", "Indices of SA, CT, and p must be aligned") 921 - 922 in_funnel = pd.Series(True, index=SA.index) # Default all to True - 923 condition = ( - 924 (p > 8000) | - 925 (SA < 0) | (SA > 42) | - 926 ((p < 500) & (CT < CT_freezing_p)) | - 927 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | - 928 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | - 929 ((p >= 500) & (CT < CT_freezing_500)) | - 930 ((p >= 6500) & (SA < 30)) | - 931 ((p >= 6500) & (CT > 10.0)) | - 932 SA.isna() | CT.isna() | p.isna() - 933 ) - 934 in_funnel[condition] = False - 935 - 936 return in_funnel - 937 - 938 @staticmethod - 939 def calculate_and_drop_salinity_spikes(df): - 940 """ - 941 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable - 942 changes in salinity with depth. - 943 - 944 Parameters - 945 ---------- - 946 df : DataFrame - 947 DataFrame containing depth and salinity data - 948 - 949 Returns - 950 ------- - 951 DataFrame - 952 DataFrame after removing salinity spikes - 953 """ - 954 acceptable_delta_salinity_per_depth = [ - 955 (0.0005, 0.001), - 956 (0.005, 0.01), - 957 (0.05, 0.1), - 958 (0.5, 1) - 959 ] - 960 if df.empty: - 961 return None - 962 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors - 963 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') - 964 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') - 965 - 966 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN - 967 df = df.dropna(subset=['depth_00', 'salinity_00']) - 968 - 969 # Check if there is enough depth range to calculate - 970 min_depth = df['depth_00'].min() - 971 max_depth = df['depth_00'].max() - 972 if min_depth == max_depth: - 973 print("Insufficient depth range to calculate.") - 974 return df + 922 if np.isscalar(p): + 923 p = pd.Series(p, index=SA.index) + 924 + 925 # Define the funnel conditions + 926 CT_freezing_p = gsw.CT_freezing(SA, p, 0) + 927 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) + 928 + 929 in_funnel = pd.Series(True, index=SA.index) # Default all to True + 930 condition = ( + 931 (p > 8000) | + 932 (SA < 0) | (SA > 42) | + 933 ((p < 500) & (CT < CT_freezing_p)) | + 934 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | + 935 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | + 936 ((p >= 500) & (CT < CT_freezing_500)) | + 937 ((p >= 6500) & (SA < 30)) | + 938 ((p >= 6500) & (CT > 10.0)) | + 939 SA.isna() | CT.isna() | p.isna() + 940 ) + 941 in_funnel[condition] = False + 942 + 943 return in_funnel + 944 + 945 @staticmethod + 946 def calculate_and_drop_salinity_spikes(df): + 947 """ + 948 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable + 949 changes in salinity with depth. + 950 + 951 Parameters + 952 ---------- + 953 df : DataFrame + 954 DataFrame containing depth and salinity data + 955 + 956 Returns + 957 ------- + 958 DataFrame + 959 DataFrame after removing salinity spikes + 960 """ + 961 acceptable_delta_salinity_per_depth = [ + 962 (0.0005, 0.001), + 963 (0.005, 0.01), + 964 (0.05, 0.1), + 965 (0.5, 1) + 966 ] + 967 if df.empty: + 968 return None + 969 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors + 970 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') + 971 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') + 972 + 973 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN + 974 df = df.dropna(subset=['depth_00', 'salinity_00']) 975 - 976 def recursively_drop(df, depth_range, acceptable_delta, i): - 977 try: - 978 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points - 979 except: - 980 print("Error in calculating number of points.") - 981 return df - 982 ranges = np.linspace(min_depth, max_depth, num=num_points) - 983 - 984 # Group by these ranges - 985 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) - 986 - 987 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 - 988 filtered_groups = groups.filter( - 989 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) - 990 # Get the indices of the filtered groups - 991 filtered_indices = filtered_groups.index - 992 return filtered_groups + 976 # Check if there is enough depth range to calculate + 977 min_depth = df['depth_00'].min() + 978 max_depth = df['depth_00'].max() + 979 if min_depth == max_depth: + 980 print("Insufficient depth range to calculate.") + 981 return df + 982 + 983 def recursively_drop(df, depth_range, acceptable_delta, i): + 984 try: + 985 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points + 986 except: + 987 print("Error in calculating number of points.") + 988 return df + 989 ranges = np.linspace(min_depth, max_depth, num=num_points) + 990 + 991 # Group by these ranges + 992 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) 993 - 994 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): - 995 df = recursively_drop(df, deltas[0], deltas[1], i) - 996 return df - 997 - 998 @staticmethod - 999 def calculate_overturns(ctd_array): -1000 """ -1001 Calculates density overturns in the CTD data where denser water lies above lighter water with density -1002 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. -1003 -1004 Parameters -1005 ---------- -1006 ctd_array : DataFrame -1007 DataFrame containing depth, density, and timestamp data -1008 -1009 Returns -1010 ------- -1011 DataFrame -1012 DataFrame with identified density overturns -1013 """ -1014 # Sort DataFrame by depth in ascending order -1015 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) -1016 # Calculate density change and identify overturns -1017 ctd_array['density_change'] = ctd_array[ -1018 'density'].diff() # Difference in density between consecutive measurements -1019 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 -1020 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) -1021 if 'density_change' in ctd_array.columns: -1022 ctd_array = ctd_array.drop('density_change', axis=1) -1023 return ctd_array -1024 -1025 @staticmethod -1026 def calculate_absolute_density(ctd_array): -1027 """ -1028 Calculates absolute density from the CTD data using the TEOS-10 equations, -1029 ensuring all data points are within the valid oceanographic funnel. -1030 -1031 Parameters -1032 ---------- -1033 ctd_array : DataFrame -1034 DataFrame containing salinity, temperature, and pressure data -1035 -1036 Returns -1037 ------- -1038 Series -1039 Series with calculated absolute density -1040 """ -1041 SA = ctd_array['salinity_00'] -1042 t = ctd_array['temperature_00'] -1043 p = ctd_array['pressure_00'] -1044 CT = gsw.CT_from_t(SA, t, p) -1045 if Calculate.gsw_infunnel(SA, CT, p).all(): -1046 return gsw.density.rho_t_exact(SA, t, p) -1047 else: -1048 raise CTDError("", "Sample not in funnel, could not calculate density.") -1049 -1050 @staticmethod -1051 def calculate_absolute_salinity(ctd_array): -1052 """ -1053 Calculates absolute salinity from practical salinity, pressure, -1054 and geographical coordinates using the TEOS-10 salinity conversion formulas. -1055 -1056 Parameters -1057 ---------- -1058 ctd_array : DataFrame -1059 DataFrame containing practical salinity, pressure, longitude, and latitude data -1060 -1061 Returns -1062 ------- -1063 Series -1064 Series with calculated absolute salinity -1065 """ -1066 SP = ctd_array['salinity_00'] -1067 p = ctd_array['pressure_00'] -1068 lon = ctd_array['longitude'] -1069 lat = ctd_array['latitude'] -1070 return gsw.conversions.SA_from_SP(SP, p, lon, lat) -1071 -1072 @staticmethod -1073 def calculate_mld(densities, depths, reference_depth, delta = 0.03): -1074 """ -1075 Calculates the mixed layer depth (MLD) using the density threshold method. -1076 MLD is the depth at which the density exceeds the reference density -1077 by a predefined amount delta, which defaults to (0.03 kg/m³). + 994 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 + 995 filtered_groups = groups.filter( + 996 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) + 997 # Get the indices of the filtered groups + 998 filtered_indices = filtered_groups.index + 999 return filtered_groups +1000 +1001 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): +1002 df = recursively_drop(df, deltas[0], deltas[1], i) +1003 return df +1004 +1005 @staticmethod +1006 def calculate_overturns(ctd_array): +1007 """ +1008 Calculates density overturns in the CTD data where denser water lies above lighter water with density +1009 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. +1010 +1011 Parameters +1012 ---------- +1013 ctd_array : DataFrame +1014 DataFrame containing depth, density, and timestamp data +1015 +1016 Returns +1017 ------- +1018 DataFrame +1019 DataFrame with identified density overturns +1020 """ +1021 # Sort DataFrame by depth in ascending order +1022 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) +1023 # Calculate density change and identify overturns +1024 ctd_array['density_change'] = ctd_array[ +1025 'density'].diff() # Difference in density between consecutive measurements +1026 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 +1027 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) +1028 if 'density_change' in ctd_array.columns: +1029 ctd_array = ctd_array.drop('density_change', axis=1) +1030 return ctd_array +1031 +1032 @staticmethod +1033 def calculate_absolute_density(ctd_array): +1034 """ +1035 Calculates absolute density from the CTD data using the TEOS-10 equations, +1036 ensuring all data points are within the valid oceanographic funnel. +1037 +1038 Parameters +1039 ---------- +1040 ctd_array : DataFrame +1041 DataFrame containing salinity, temperature, and pressure data +1042 +1043 Returns +1044 ------- +1045 Series +1046 Series with calculated absolute density +1047 """ +1048 SA = ctd_array['salinity_00'] +1049 t = ctd_array['temperature_00'] +1050 p = ctd_array['pressure_00'] +1051 CT = gsw.CT_from_t(SA, t, p) +1052 if Calculate.gsw_infunnel(SA, CT, p).all(): +1053 return gsw.density.rho_t_exact(SA, t, p) +1054 else: +1055 raise CTDError("", "Sample not in funnel, could not calculate density.") +1056 +1057 @staticmethod +1058 def calculate_absolute_salinity(ctd_array): +1059 """ +1060 Calculates absolute salinity from practical salinity, pressure, +1061 and geographical coordinates using the TEOS-10 salinity conversion formulas. +1062 +1063 Parameters +1064 ---------- +1065 ctd_array : DataFrame +1066 DataFrame containing practical salinity, pressure, longitude, and latitude data +1067 +1068 Returns +1069 ------- +1070 Series +1071 Series with calculated absolute salinity +1072 """ +1073 SP = ctd_array['salinity_00'] +1074 p = ctd_array['pressure_00'] +1075 lon = ctd_array['longitude'] +1076 lat = ctd_array['latitude'] +1077 return gsw.conversions.SA_from_SP(SP, p, lon, lat) 1078 -1079 Parameters -1080 ---------- -1081 densities : Series -1082 Series of densities -1083 depths : Series -1084 Series of depths corresponding to densities -1085 reference_depth : float -1086 The depth at which to anchor the reference density -1087 delta : float, optional -1088 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1089 -1090 Returns -1091 ------- -1092 tuple -1093 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1094 """ -1095 # Convert to numeric and ensure no NaNs remain -1096 densities = densities.apply(pd.to_numeric, errors='coerce') -1097 depths = depths.apply(pd.to_numeric, errors='coerce') -1098 densities = densities.dropna(how='any').reset_index(drop=True) -1099 depths = depths.dropna(how='any').reset_index(drop=True) -1100 reference_depth = int(reference_depth) -1101 if len(depths) == 0 or len(densities) == 0: -1102 return None -1103 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) -1104 sorted_depths, sorted_densities = zip(*sorted_data) -1105 # Determine reference density -1106 reference_density = None -1107 for i, depth in enumerate(sorted_depths): -1108 if depth >= reference_depth: -1109 if depth == reference_depth: -1110 reference_density = sorted_densities[i] -1111 reference_depth = sorted_depths[i] -1112 else: -1113 # Linear interpolation -1114 try: -1115 reference_density = sorted_densities[i - 1] + ( -1116 (sorted_densities[i] - sorted_densities[i - 1]) * ( -1117 (reference_depth - sorted_depths[i - 1]) / -1118 (sorted_depths[i] - sorted_depths[i - 1]))) -1119 except: -1120 raise CTDError("", -1121 f"Insufficient depth range to calculate MLD. " -1122 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") -1123 break -1124 if reference_density is None: -1125 return None -1126 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1127 for depth, density in zip(sorted_depths, sorted_densities): -1128 if density > reference_density + delta and depth >= reference_depth: -1129 return depth, reference_depth -1130 return None # If no depth meets the criterion -1131 -1132 @staticmethod -1133 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): -1134 """ -1135 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and -1136 then determine the depth where the smoothed density exceeds the reference density -1137 by a predefined amount which defaults to 0.03 kg/m³. +1079 @staticmethod +1080 def calculate_mld(densities, depths, reference_depth, delta = 0.03): +1081 """ +1082 Calculates the mixed layer depth (MLD) using the density threshold method. +1083 MLD is the depth at which the density exceeds the reference density +1084 by a predefined amount delta, which defaults to (0.03 kg/m³). +1085 +1086 Parameters +1087 ---------- +1088 densities : Series +1089 Series of densities +1090 depths : Series +1091 Series of depths corresponding to densities +1092 reference_depth : float +1093 The depth at which to anchor the reference density +1094 delta : float, optional +1095 The difference in density which would indicate the MLD, defaults to 0.03 kg/m³. +1096 +1097 Returns +1098 ------- +1099 tuple +1100 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1101 """ +1102 # Convert to numeric and ensure no NaNs remain +1103 densities = densities.apply(pd.to_numeric, errors='coerce') +1104 depths = depths.apply(pd.to_numeric, errors='coerce') +1105 densities = densities.dropna(how='any').reset_index(drop=True) +1106 depths = depths.dropna(how='any').reset_index(drop=True) +1107 reference_depth = int(reference_depth) +1108 if len(depths) == 0 or len(densities) == 0: +1109 return None +1110 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) +1111 sorted_depths, sorted_densities = zip(*sorted_data) +1112 # Determine reference density +1113 reference_density = None +1114 for i, depth in enumerate(sorted_depths): +1115 if depth >= reference_depth: +1116 if depth == reference_depth: +1117 reference_density = sorted_densities[i] +1118 reference_depth = sorted_depths[i] +1119 else: +1120 # Linear interpolation +1121 try: +1122 reference_density = sorted_densities[i - 1] + ( +1123 (sorted_densities[i] - sorted_densities[i - 1]) * ( +1124 (reference_depth - sorted_depths[i - 1]) / +1125 (sorted_depths[i] - sorted_depths[i - 1]))) +1126 except: +1127 raise CTDError("", +1128 f"Insufficient depth range to calculate MLD. " +1129 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") +1130 break +1131 if reference_density is None: +1132 return None +1133 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1134 for depth, density in zip(sorted_depths, sorted_densities): +1135 if density > reference_density + delta and depth >= reference_depth: +1136 return depth, reference_depth +1137 return None # If no depth meets the criterion 1138 -1139 Parameters -1140 ---------- -1141 densities : Series -1142 Series of densities -1143 depths : Series -1144 Series of depths corresponding to densities -1145 reference_depth : -1146 The depth at which to anchor the reference density -1147 delta : float, optional -1148 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1149 -1150 Returns -1151 ------- -1152 tuple -1153 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1154 """ -1155 # Ensure input is pandas Series and drop NA values -1156 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): -1157 densities = densities.dropna().reset_index(drop=True) -1158 depths = depths.dropna().reset_index(drop=True) -1159 -1160 # Convert to numeric and ensure no NaNs remain -1161 densities = densities.apply(pd.to_numeric, errors='coerce') -1162 depths = depths.apply(pd.to_numeric, errors='coerce') -1163 densities = densities.dropna().reset_index(drop=True) -1164 depths = depths.dropna().reset_index(drop=True) -1165 if densities.empty or depths.empty: -1166 return None, None -1167 -1168 # Convert pandas Series to numpy arrays for NumPy operations -1169 densities = densities.to_numpy() -1170 depths = depths.to_numpy() -1171 -1172 # Remove duplicates by averaging densities at the same depth -1173 unique_depths, indices = np.unique(depths, return_inverse=True) -1174 average_densities = np.zeros_like(unique_depths) -1175 np.add.at(average_densities, indices, densities) -1176 counts = np.zeros_like(unique_depths) -1177 np.add.at(counts, indices, 1) -1178 average_densities /= counts -1179 -1180 # Apply LOESS smoothing -1181 lowess = statsmodels.api.nonparametric.lowess -1182 smoothed = lowess(average_densities, unique_depths, frac=0.1) -1183 smoothed_depths, smoothed_densities = zip(*smoothed) -1184 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) -1185 -1186 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1187 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta -1188 and np.array(smoothed_densities) > reference_depth)[0] -1189 if exceeding_indices.size > 0: -1190 mld_depth = smoothed_depths[exceeding_indices[0]] -1191 return mld_depth, reference_depth +1139 @staticmethod +1140 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): +1141 """ +1142 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and +1143 then determine the depth where the smoothed density exceeds the reference density +1144 by a predefined amount which defaults to 0.03 kg/m³. +1145 +1146 Parameters +1147 ---------- +1148 densities : Series +1149 Series of densities +1150 depths : Series +1151 Series of depths corresponding to densities +1152 reference_depth : +1153 The depth at which to anchor the reference density +1154 delta : float, optional +1155 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. +1156 +1157 Returns +1158 ------- +1159 tuple +1160 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1161 """ +1162 # Ensure input is pandas Series and drop NA values +1163 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): +1164 densities = densities.dropna().reset_index(drop=True) +1165 depths = depths.dropna().reset_index(drop=True) +1166 +1167 # Convert to numeric and ensure no NaNs remain +1168 densities = densities.apply(pd.to_numeric, errors='coerce') +1169 depths = depths.apply(pd.to_numeric, errors='coerce') +1170 densities = densities.dropna().reset_index(drop=True) +1171 depths = depths.dropna().reset_index(drop=True) +1172 if densities.empty or depths.empty: +1173 return None, None +1174 +1175 # Convert pandas Series to numpy arrays for NumPy operations +1176 densities = densities.to_numpy() +1177 depths = depths.to_numpy() +1178 +1179 # Remove duplicates by averaging densities at the same depth +1180 unique_depths, indices = np.unique(depths, return_inverse=True) +1181 average_densities = np.zeros_like(unique_depths) +1182 np.add.at(average_densities, indices, densities) +1183 counts = np.zeros_like(unique_depths) +1184 np.add.at(counts, indices, 1) +1185 average_densities /= counts +1186 +1187 # Apply LOESS smoothing +1188 lowess = statsmodels.api.nonparametric.lowess +1189 smoothed = lowess(average_densities, unique_depths, frac=0.1) +1190 smoothed_depths, smoothed_densities = zip(*smoothed) +1191 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) 1192 -1193 return None, None # If no depth meets the criterion -1194 -1195 @staticmethod -1196 def calculate_mean_surface_density(df, range_): -1197 """ -1198 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1193 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1194 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta +1195 and np.array(smoothed_densities) > reference_depth)[0] +1196 if exceeding_indices.size > 0: +1197 mld_depth = smoothed_depths[exceeding_indices[0]] +1198 return mld_depth, reference_depth 1199 -1200 Parameters -1201 ---------- -1202 df : DataFrame -1203 DataFrame containing density data. -1204 range_ : tuple or int -1205 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, -1206 or an integer indicating the number of rows from the start. -1207 -1208 Returns -1209 ------- -1210 float, None -1211 Mean density value of the specified sample or None if unable to calculate. -1212 """ -1213 min_depth = df.index.min() -1214 max_depth = df.index.max() -1215 -1216 if isinstance(range_, tuple): -1217 start, end = range_ -1218 -1219 # Adjust 'start' to ensure it is within the valid range -1220 start = max(start, min_depth) -1221 -1222 # Adjust 'end' to ensure it does not exceed the maximum depth value -1223 end = min(end, max_depth) -1224 -1225 # Ensure start is less than end -1226 if start <= end: -1227 return df.loc[start:end, 'density'].mean() -1228 else: -1229 return None -1230 -1231 elif isinstance(range_, int): -1232 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length -1233 range_ = min(range_, len(df)) -1234 return df.iloc[:range_, df.columns.get_loc('density')].mean() -1235 -1236 else: -1237 raise ValueError("Invalid range type. Must be tuple or int.") +1200 return None, None # If no depth meets the criterion +1201 +1202 @staticmethod +1203 def calculate_mean_surface_density(df, range_): +1204 """ +1205 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1206 +1207 Parameters +1208 ---------- +1209 df : DataFrame +1210 DataFrame containing density data. +1211 range_ : tuple or int +1212 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, +1213 or an integer indicating the number of rows from the start. +1214 +1215 Returns +1216 ------- +1217 float, None +1218 Mean density value of the specified sample or None if unable to calculate. +1219 """ +1220 min_depth = df.index.min() +1221 max_depth = df.index.max() +1222 +1223 if isinstance(range_, tuple): +1224 start, end = range_ +1225 +1226 # Adjust 'start' to ensure it is within the valid range +1227 start = max(start, min_depth) +1228 +1229 # Adjust 'end' to ensure it does not exceed the maximum depth value +1230 end = min(end, max_depth) +1231 +1232 # Ensure start is less than end +1233 if start <= end: +1234 return df.loc[start:end, 'density'].mean() +1235 else: +1236 return None +1237 +1238 elif isinstance(range_, int): +1239 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length +1240 range_ = min(range_, len(df)) +1241 return df.iloc[:range_, df.columns.get_loc('density')].mean() +1242 +1243 else: +1244 raise ValueError("Invalid range type. Must be tuple or int.")
887 @staticmethod -888 def gsw_infunnel(SA, CT, p): -889 """ -890 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), -891 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. -892 -893 Parameters -894 ---------- -895 SA : Series -896 Absolute Salinity in g/kg. -897 CT : Series -898 Conservative Temperature in degrees Celsius. -899 p : Series -900 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). -901 -902 Returns -903 ------- -904 Series of bool -905 A boolean array where True indicates the values are inside the "oceanographic funnel". -906 """ -907 # Ensure all inputs are Series and aligned -908 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( -909 isinstance(p, pd.Series) or np.isscalar(p))): -910 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") -911 -912 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: -913 raise CTDError("", "Indices of SA, CT, and p must be aligned") -914 -915 if np.isscalar(p): -916 p = pd.Series(p, index=SA.index) -917 -918 # Define the funnel conditions -919 CT_freezing_p = gsw.CT_freezing(SA, p, 0) -920 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) + 894 @staticmethod +895 def gsw_infunnel(SA, CT, p): +896 """ +897 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), +898 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. +899 +900 Parameters +901 ---------- +902 SA : Series +903 Absolute Salinity in g/kg. +904 CT : Series +905 Conservative Temperature in degrees Celsius. +906 p : Series +907 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). +908 +909 Returns +910 ------- +911 Series of bool +912 A boolean array where True indicates the values are inside the "oceanographic funnel". +913 """ +914 # Ensure all inputs are Series and aligned +915 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( +916 isinstance(p, pd.Series) or np.isscalar(p))): +917 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") +918 +919 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: +920 raise CTDError("", "Indices of SA, CT, and p must be aligned") 921 -922 in_funnel = pd.Series(True, index=SA.index) # Default all to True -923 condition = ( -924 (p > 8000) | -925 (SA < 0) | (SA > 42) | -926 ((p < 500) & (CT < CT_freezing_p)) | -927 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | -928 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | -929 ((p >= 500) & (CT < CT_freezing_500)) | -930 ((p >= 6500) & (SA < 30)) | -931 ((p >= 6500) & (CT > 10.0)) | -932 SA.isna() | CT.isna() | p.isna() -933 ) -934 in_funnel[condition] = False -935 -936 return in_funnel +922 if np.isscalar(p): +923 p = pd.Series(p, index=SA.index) +924 +925 # Define the funnel conditions +926 CT_freezing_p = gsw.CT_freezing(SA, p, 0) +927 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) +928 +929 in_funnel = pd.Series(True, index=SA.index) # Default all to True +930 condition = ( +931 (p > 8000) | +932 (SA < 0) | (SA > 42) | +933 ((p < 500) & (CT < CT_freezing_p)) | +934 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | +935 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | +936 ((p >= 500) & (CT < CT_freezing_500)) | +937 ((p >= 6500) & (SA < 30)) | +938 ((p >= 6500) & (CT > 10.0)) | +939 SA.isna() | CT.isna() | p.isna() +940 ) +941 in_funnel[condition] = False +942 +943 return in_funnel @@ -4922,65 +4953,65 @@ Returns
894 @staticmethod +895 def gsw_infunnel(SA, CT, p): +896 """ +897 Check if the given Absolute Salinity (SA), Conservative Temperature (CT), +898 and pressure (p) are within the "oceanographic funnel" for the TEOS-10 75-term equation. +899 +900 Parameters +901 ---------- +902 SA : Series +903 Absolute Salinity in g/kg. +904 CT : Series +905 Conservative Temperature in degrees Celsius. +906 p : Series +907 Sea pressure in dbar (absolute pressure minus 10.1325 dbar). +908 +909 Returns +910 ------- +911 Series of bool +912 A boolean array where True indicates the values are inside the "oceanographic funnel". +913 """ +914 # Ensure all inputs are Series and aligned +915 if not (isinstance(SA, pd.Series) and isinstance(CT, pd.Series) and ( +916 isinstance(p, pd.Series) or np.isscalar(p))): +917 raise CTDError("", "SA, CT, and p must be pandas Series or p a scalar") +918 +919 if isinstance(p, pd.Series) and (SA.index.equals(CT.index) and SA.index.equals(p.index)) is False: +920 raise CTDError("", "Indices of SA, CT, and p must be aligned") 921 -922 in_funnel = pd.Series(True, index=SA.index) # Default all to True -923 condition = ( -924 (p > 8000) | -925 (SA < 0) | (SA > 42) | -926 ((p < 500) & (CT < CT_freezing_p)) | -927 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | -928 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | -929 ((p >= 500) & (CT < CT_freezing_500)) | -930 ((p >= 6500) & (SA < 30)) | -931 ((p >= 6500) & (CT > 10.0)) | -932 SA.isna() | CT.isna() | p.isna() -933 ) -934 in_funnel[condition] = False -935 -936 return in_funnel +922 if np.isscalar(p): +923 p = pd.Series(p, index=SA.index) +924 +925 # Define the funnel conditions +926 CT_freezing_p = gsw.CT_freezing(SA, p, 0) +927 CT_freezing_500 = gsw.CT_freezing(SA, 500, 0) +928 +929 in_funnel = pd.Series(True, index=SA.index) # Default all to True +930 condition = ( +931 (p > 8000) | +932 (SA < 0) | (SA > 42) | +933 ((p < 500) & (CT < CT_freezing_p)) | +934 ((p >= 500) & (p < 6500) & (SA < p * 5e-3 - 2.5)) | +935 ((p >= 500) & (p < 6500) & (CT > (31.66666666666667 - p * 3.333333333333334e-3))) | +936 ((p >= 500) & (CT < CT_freezing_500)) | +937 ((p >= 6500) & (SA < 30)) | +938 ((p >= 6500) & (CT > 10.0)) | +939 SA.isna() | CT.isna() | p.isna() +940 ) +941 in_funnel[condition] = False +942 +943 return in_funnel
938 @staticmethod -939 def calculate_and_drop_salinity_spikes(df): -940 """ -941 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable -942 changes in salinity with depth. -943 -944 Parameters -945 ---------- -946 df : DataFrame -947 DataFrame containing depth and salinity data -948 -949 Returns -950 ------- -951 DataFrame -952 DataFrame after removing salinity spikes -953 """ -954 acceptable_delta_salinity_per_depth = [ -955 (0.0005, 0.001), -956 (0.005, 0.01), -957 (0.05, 0.1), -958 (0.5, 1) -959 ] -960 if df.empty: -961 return None -962 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors -963 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') -964 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') -965 -966 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN -967 df = df.dropna(subset=['depth_00', 'salinity_00']) -968 -969 # Check if there is enough depth range to calculate -970 min_depth = df['depth_00'].min() -971 max_depth = df['depth_00'].max() -972 if min_depth == max_depth: -973 print("Insufficient depth range to calculate.") -974 return df -975 -976 def recursively_drop(df, depth_range, acceptable_delta, i): -977 try: -978 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points -979 except: -980 print("Error in calculating number of points.") -981 return df -982 ranges = np.linspace(min_depth, max_depth, num=num_points) -983 -984 # Group by these ranges -985 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) -986 -987 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 -988 filtered_groups = groups.filter( -989 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) -990 # Get the indices of the filtered groups -991 filtered_indices = filtered_groups.index -992 return filtered_groups -993 -994 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): -995 df = recursively_drop(df, deltas[0], deltas[1], i) -996 return df + 945 @staticmethod + 946 def calculate_and_drop_salinity_spikes(df): + 947 """ + 948 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable + 949 changes in salinity with depth. + 950 + 951 Parameters + 952 ---------- + 953 df : DataFrame + 954 DataFrame containing depth and salinity data + 955 + 956 Returns + 957 ------- + 958 DataFrame + 959 DataFrame after removing salinity spikes + 960 """ + 961 acceptable_delta_salinity_per_depth = [ + 962 (0.0005, 0.001), + 963 (0.005, 0.01), + 964 (0.05, 0.1), + 965 (0.5, 1) + 966 ] + 967 if df.empty: + 968 return None + 969 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors + 970 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') + 971 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') + 972 + 973 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN + 974 df = df.dropna(subset=['depth_00', 'salinity_00']) + 975 + 976 # Check if there is enough depth range to calculate + 977 min_depth = df['depth_00'].min() + 978 max_depth = df['depth_00'].max() + 979 if min_depth == max_depth: + 980 print("Insufficient depth range to calculate.") + 981 return df + 982 + 983 def recursively_drop(df, depth_range, acceptable_delta, i): + 984 try: + 985 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points + 986 except: + 987 print("Error in calculating number of points.") + 988 return df + 989 ranges = np.linspace(min_depth, max_depth, num=num_points) + 990 + 991 # Group by these ranges + 992 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) + 993 + 994 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 + 995 filtered_groups = groups.filter( + 996 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) + 997 # Get the indices of the filtered groups + 998 filtered_indices = filtered_groups.index + 999 return filtered_groups +1000 +1001 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): +1002 df = recursively_drop(df, deltas[0], deltas[1], i) +1003 return df @@ -5015,32 +5046,32 @@ Returns
945 @staticmethod + 946 def calculate_and_drop_salinity_spikes(df): + 947 """ + 948 Calculates and removes salinity spikes from the CTD data based on predefined thresholds for acceptable + 949 changes in salinity with depth. + 950 + 951 Parameters + 952 ---------- + 953 df : DataFrame + 954 DataFrame containing depth and salinity data + 955 + 956 Returns + 957 ------- + 958 DataFrame + 959 DataFrame after removing salinity spikes + 960 """ + 961 acceptable_delta_salinity_per_depth = [ + 962 (0.0005, 0.001), + 963 (0.005, 0.01), + 964 (0.05, 0.1), + 965 (0.5, 1) + 966 ] + 967 if df.empty: + 968 return None + 969 # Convert 'depth_00' and 'salinity_00' to numeric, coercing errors + 970 df['depth_00'] = pd.to_numeric(df['depth_00'], errors='coerce') + 971 df['salinity_00'] = pd.to_numeric(df['salinity_00'], errors='coerce') + 972 + 973 # Drop any rows where either 'depth_00' or 'salinity_00' is NaN + 974 df = df.dropna(subset=['depth_00', 'salinity_00']) + 975 + 976 # Check if there is enough depth range to calculate + 977 min_depth = df['depth_00'].min() + 978 max_depth = df['depth_00'].max() + 979 if min_depth == max_depth: + 980 print("Insufficient depth range to calculate.") + 981 return df + 982 + 983 def recursively_drop(df, depth_range, acceptable_delta, i): + 984 try: + 985 num_points = int((max_depth - min_depth) / depth_range) # Calculate number of points + 986 except: + 987 print("Error in calculating number of points.") + 988 return df + 989 ranges = np.linspace(min_depth, max_depth, num=num_points) + 990 + 991 # Group by these ranges + 992 groups = df.groupby(pd.cut(df['depth_00'], ranges), observed=True) + 993 + 994 # Calculate the min and max salinity for each range and filter ranges where the difference is <= 1 + 995 filtered_groups = groups.filter( + 996 lambda x: abs(x['salinity_00'].max() - x['salinity_00'].min()) <= acceptable_delta) + 997 # Get the indices of the filtered groups + 998 filtered_indices = filtered_groups.index + 999 return filtered_groups +1000 +1001 for i, deltas in enumerate(acceptable_delta_salinity_per_depth): +1002 df = recursively_drop(df, deltas[0], deltas[1], i) +1003 return df
998 @staticmethod - 999 def calculate_overturns(ctd_array): -1000 """ -1001 Calculates density overturns in the CTD data where denser water lies above lighter water with density -1002 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. -1003 -1004 Parameters -1005 ---------- -1006 ctd_array : DataFrame -1007 DataFrame containing depth, density, and timestamp data -1008 -1009 Returns -1010 ------- -1011 DataFrame -1012 DataFrame with identified density overturns -1013 """ -1014 # Sort DataFrame by depth in ascending order -1015 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) -1016 # Calculate density change and identify overturns -1017 ctd_array['density_change'] = ctd_array[ -1018 'density'].diff() # Difference in density between consecutive measurements -1019 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 -1020 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) -1021 if 'density_change' in ctd_array.columns: -1022 ctd_array = ctd_array.drop('density_change', axis=1) -1023 return ctd_array + 1005 @staticmethod +1006 def calculate_overturns(ctd_array): +1007 """ +1008 Calculates density overturns in the CTD data where denser water lies above lighter water with density +1009 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. +1010 +1011 Parameters +1012 ---------- +1013 ctd_array : DataFrame +1014 DataFrame containing depth, density, and timestamp data +1015 +1016 Returns +1017 ------- +1018 DataFrame +1019 DataFrame with identified density overturns +1020 """ +1021 # Sort DataFrame by depth in ascending order +1022 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) +1023 # Calculate density change and identify overturns +1024 ctd_array['density_change'] = ctd_array[ +1025 'density'].diff() # Difference in density between consecutive measurements +1026 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 +1027 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) +1028 if 'density_change' in ctd_array.columns: +1029 ctd_array = ctd_array.drop('density_change', axis=1) +1030 return ctd_array @@ -5075,30 +5106,30 @@ Returns
1005 @staticmethod +1006 def calculate_overturns(ctd_array): +1007 """ +1008 Calculates density overturns in the CTD data where denser water lies above lighter water with density +1009 difference of at least 0.05 kg/m³, which may indicate mixing or other dynamic processes. +1010 +1011 Parameters +1012 ---------- +1013 ctd_array : DataFrame +1014 DataFrame containing depth, density, and timestamp data +1015 +1016 Returns +1017 ------- +1018 DataFrame +1019 DataFrame with identified density overturns +1020 """ +1021 # Sort DataFrame by depth in ascending order +1022 ctd_array = ctd_array.sort_values(by='depth_00', ascending=True) +1023 # Calculate density change and identify overturns +1024 ctd_array['density_change'] = ctd_array[ +1025 'density'].diff() # Difference in density between consecutive measurements +1026 ctd_array['overturn'] = ctd_array['density_change'] < -0.05 +1027 ctd_array = ctd_array.sort_values(by='timestamp', ascending=True) +1028 if 'density_change' in ctd_array.columns: +1029 ctd_array = ctd_array.drop('density_change', axis=1) +1030 return ctd_array
1025 @staticmethod -1026 def calculate_absolute_density(ctd_array): -1027 """ -1028 Calculates absolute density from the CTD data using the TEOS-10 equations, -1029 ensuring all data points are within the valid oceanographic funnel. -1030 -1031 Parameters -1032 ---------- -1033 ctd_array : DataFrame -1034 DataFrame containing salinity, temperature, and pressure data -1035 -1036 Returns -1037 ------- -1038 Series -1039 Series with calculated absolute density -1040 """ -1041 SA = ctd_array['salinity_00'] -1042 t = ctd_array['temperature_00'] -1043 p = ctd_array['pressure_00'] -1044 CT = gsw.CT_from_t(SA, t, p) -1045 if Calculate.gsw_infunnel(SA, CT, p).all(): -1046 return gsw.density.rho_t_exact(SA, t, p) -1047 else: -1048 raise CTDError("", "Sample not in funnel, could not calculate density.") + 1032 @staticmethod +1033 def calculate_absolute_density(ctd_array): +1034 """ +1035 Calculates absolute density from the CTD data using the TEOS-10 equations, +1036 ensuring all data points are within the valid oceanographic funnel. +1037 +1038 Parameters +1039 ---------- +1040 ctd_array : DataFrame +1041 DataFrame containing salinity, temperature, and pressure data +1042 +1043 Returns +1044 ------- +1045 Series +1046 Series with calculated absolute density +1047 """ +1048 SA = ctd_array['salinity_00'] +1049 t = ctd_array['temperature_00'] +1050 p = ctd_array['pressure_00'] +1051 CT = gsw.CT_from_t(SA, t, p) +1052 if Calculate.gsw_infunnel(SA, CT, p).all(): +1053 return gsw.density.rho_t_exact(SA, t, p) +1054 else: +1055 raise CTDError("", "Sample not in funnel, could not calculate density.") @@ -5133,27 +5164,27 @@ Returns
1032 @staticmethod +1033 def calculate_absolute_density(ctd_array): +1034 """ +1035 Calculates absolute density from the CTD data using the TEOS-10 equations, +1036 ensuring all data points are within the valid oceanographic funnel. +1037 +1038 Parameters +1039 ---------- +1040 ctd_array : DataFrame +1041 DataFrame containing salinity, temperature, and pressure data +1042 +1043 Returns +1044 ------- +1045 Series +1046 Series with calculated absolute density +1047 """ +1048 SA = ctd_array['salinity_00'] +1049 t = ctd_array['temperature_00'] +1050 p = ctd_array['pressure_00'] +1051 CT = gsw.CT_from_t(SA, t, p) +1052 if Calculate.gsw_infunnel(SA, CT, p).all(): +1053 return gsw.density.rho_t_exact(SA, t, p) +1054 else: +1055 raise CTDError("", "Sample not in funnel, could not calculate density.")
1050 @staticmethod -1051 def calculate_absolute_salinity(ctd_array): -1052 """ -1053 Calculates absolute salinity from practical salinity, pressure, -1054 and geographical coordinates using the TEOS-10 salinity conversion formulas. -1055 -1056 Parameters -1057 ---------- -1058 ctd_array : DataFrame -1059 DataFrame containing practical salinity, pressure, longitude, and latitude data -1060 -1061 Returns -1062 ------- -1063 Series -1064 Series with calculated absolute salinity -1065 """ -1066 SP = ctd_array['salinity_00'] -1067 p = ctd_array['pressure_00'] -1068 lon = ctd_array['longitude'] -1069 lat = ctd_array['latitude'] -1070 return gsw.conversions.SA_from_SP(SP, p, lon, lat) + 1057 @staticmethod +1058 def calculate_absolute_salinity(ctd_array): +1059 """ +1060 Calculates absolute salinity from practical salinity, pressure, +1061 and geographical coordinates using the TEOS-10 salinity conversion formulas. +1062 +1063 Parameters +1064 ---------- +1065 ctd_array : DataFrame +1066 DataFrame containing practical salinity, pressure, longitude, and latitude data +1067 +1068 Returns +1069 ------- +1070 Series +1071 Series with calculated absolute salinity +1072 """ +1073 SP = ctd_array['salinity_00'] +1074 p = ctd_array['pressure_00'] +1075 lon = ctd_array['longitude'] +1076 lat = ctd_array['latitude'] +1077 return gsw.conversions.SA_from_SP(SP, p, lon, lat) @@ -5188,65 +5219,65 @@ Returns
1057 @staticmethod +1058 def calculate_absolute_salinity(ctd_array): +1059 """ +1060 Calculates absolute salinity from practical salinity, pressure, +1061 and geographical coordinates using the TEOS-10 salinity conversion formulas. +1062 +1063 Parameters +1064 ---------- +1065 ctd_array : DataFrame +1066 DataFrame containing practical salinity, pressure, longitude, and latitude data +1067 +1068 Returns +1069 ------- +1070 Series +1071 Series with calculated absolute salinity +1072 """ +1073 SP = ctd_array['salinity_00'] +1074 p = ctd_array['pressure_00'] +1075 lon = ctd_array['longitude'] +1076 lat = ctd_array['latitude'] +1077 return gsw.conversions.SA_from_SP(SP, p, lon, lat)
1072 @staticmethod -1073 def calculate_mld(densities, depths, reference_depth, delta = 0.03): -1074 """ -1075 Calculates the mixed layer depth (MLD) using the density threshold method. -1076 MLD is the depth at which the density exceeds the reference density -1077 by a predefined amount delta, which defaults to (0.03 kg/m³). -1078 -1079 Parameters -1080 ---------- -1081 densities : Series -1082 Series of densities -1083 depths : Series -1084 Series of depths corresponding to densities -1085 reference_depth : float -1086 The depth at which to anchor the reference density -1087 delta : float, optional -1088 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1089 -1090 Returns -1091 ------- -1092 tuple -1093 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1094 """ -1095 # Convert to numeric and ensure no NaNs remain -1096 densities = densities.apply(pd.to_numeric, errors='coerce') -1097 depths = depths.apply(pd.to_numeric, errors='coerce') -1098 densities = densities.dropna(how='any').reset_index(drop=True) -1099 depths = depths.dropna(how='any').reset_index(drop=True) -1100 reference_depth = int(reference_depth) -1101 if len(depths) == 0 or len(densities) == 0: -1102 return None -1103 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) -1104 sorted_depths, sorted_densities = zip(*sorted_data) -1105 # Determine reference density -1106 reference_density = None -1107 for i, depth in enumerate(sorted_depths): -1108 if depth >= reference_depth: -1109 if depth == reference_depth: -1110 reference_density = sorted_densities[i] -1111 reference_depth = sorted_depths[i] -1112 else: -1113 # Linear interpolation -1114 try: -1115 reference_density = sorted_densities[i - 1] + ( -1116 (sorted_densities[i] - sorted_densities[i - 1]) * ( -1117 (reference_depth - sorted_depths[i - 1]) / -1118 (sorted_depths[i] - sorted_depths[i - 1]))) -1119 except: -1120 raise CTDError("", -1121 f"Insufficient depth range to calculate MLD. " -1122 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") -1123 break -1124 if reference_density is None: -1125 return None -1126 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1127 for depth, density in zip(sorted_depths, sorted_densities): -1128 if density > reference_density + delta and depth >= reference_depth: -1129 return depth, reference_depth -1130 return None # If no depth meets the criterion + 1079 @staticmethod +1080 def calculate_mld(densities, depths, reference_depth, delta = 0.03): +1081 """ +1082 Calculates the mixed layer depth (MLD) using the density threshold method. +1083 MLD is the depth at which the density exceeds the reference density +1084 by a predefined amount delta, which defaults to (0.03 kg/m³). +1085 +1086 Parameters +1087 ---------- +1088 densities : Series +1089 Series of densities +1090 depths : Series +1091 Series of depths corresponding to densities +1092 reference_depth : float +1093 The depth at which to anchor the reference density +1094 delta : float, optional +1095 The difference in density which would indicate the MLD, defaults to 0.03 kg/m³. +1096 +1097 Returns +1098 ------- +1099 tuple +1100 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1101 """ +1102 # Convert to numeric and ensure no NaNs remain +1103 densities = densities.apply(pd.to_numeric, errors='coerce') +1104 depths = depths.apply(pd.to_numeric, errors='coerce') +1105 densities = densities.dropna(how='any').reset_index(drop=True) +1106 depths = depths.dropna(how='any').reset_index(drop=True) +1107 reference_depth = int(reference_depth) +1108 if len(depths) == 0 or len(densities) == 0: +1109 return None +1110 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) +1111 sorted_depths, sorted_densities = zip(*sorted_data) +1112 # Determine reference density +1113 reference_density = None +1114 for i, depth in enumerate(sorted_depths): +1115 if depth >= reference_depth: +1116 if depth == reference_depth: +1117 reference_density = sorted_densities[i] +1118 reference_depth = sorted_depths[i] +1119 else: +1120 # Linear interpolation +1121 try: +1122 reference_density = sorted_densities[i - 1] + ( +1123 (sorted_densities[i] - sorted_densities[i - 1]) * ( +1124 (reference_depth - sorted_depths[i - 1]) / +1125 (sorted_depths[i] - sorted_depths[i - 1]))) +1126 except: +1127 raise CTDError("", +1128 f"Insufficient depth range to calculate MLD. " +1129 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") +1130 break +1131 if reference_density is None: +1132 return None +1133 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1134 for depth, density in zip(sorted_depths, sorted_densities): +1135 if density > reference_density + delta and depth >= reference_depth: +1136 return depth, reference_depth +1137 return None # If no depth meets the criterion @@ -5264,7 +5295,7 @@ Parameters reference_depth (float): The depth at which to anchor the reference density delta (float, optional): -The difference in density which would indicate the MLD, defaults to 0.03 kg/m. +The difference in density which would indicate the MLD, defaults to 0.03 kg/m³. Returns @@ -5288,68 +5319,68 @@ Returns
1079 @staticmethod +1080 def calculate_mld(densities, depths, reference_depth, delta = 0.03): +1081 """ +1082 Calculates the mixed layer depth (MLD) using the density threshold method. +1083 MLD is the depth at which the density exceeds the reference density +1084 by a predefined amount delta, which defaults to (0.03 kg/m³). +1085 +1086 Parameters +1087 ---------- +1088 densities : Series +1089 Series of densities +1090 depths : Series +1091 Series of depths corresponding to densities +1092 reference_depth : float +1093 The depth at which to anchor the reference density +1094 delta : float, optional +1095 The difference in density which would indicate the MLD, defaults to 0.03 kg/m³. +1096 +1097 Returns +1098 ------- +1099 tuple +1100 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1101 """ +1102 # Convert to numeric and ensure no NaNs remain +1103 densities = densities.apply(pd.to_numeric, errors='coerce') +1104 depths = depths.apply(pd.to_numeric, errors='coerce') +1105 densities = densities.dropna(how='any').reset_index(drop=True) +1106 depths = depths.dropna(how='any').reset_index(drop=True) +1107 reference_depth = int(reference_depth) +1108 if len(depths) == 0 or len(densities) == 0: +1109 return None +1110 sorted_data = sorted(zip(depths, densities), key=lambda x: x[0]) +1111 sorted_depths, sorted_densities = zip(*sorted_data) +1112 # Determine reference density +1113 reference_density = None +1114 for i, depth in enumerate(sorted_depths): +1115 if depth >= reference_depth: +1116 if depth == reference_depth: +1117 reference_density = sorted_densities[i] +1118 reference_depth = sorted_depths[i] +1119 else: +1120 # Linear interpolation +1121 try: +1122 reference_density = sorted_densities[i - 1] + ( +1123 (sorted_densities[i] - sorted_densities[i - 1]) * ( +1124 (reference_depth - sorted_depths[i - 1]) / +1125 (sorted_depths[i] - sorted_depths[i - 1]))) +1126 except: +1127 raise CTDError("", +1128 f"Insufficient depth range to calculate MLD. " +1129 f"Maximum sample depth is "f"{depths.max()}, minimum is {depths.min()}") +1130 break +1131 if reference_density is None: +1132 return None +1133 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1134 for depth, density in zip(sorted_depths, sorted_densities): +1135 if density > reference_density + delta and depth >= reference_depth: +1136 return depth, reference_depth +1137 return None # If no depth meets the criterion
1132 @staticmethod -1133 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): -1134 """ -1135 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and -1136 then determine the depth where the smoothed density exceeds the reference density -1137 by a predefined amount which defaults to 0.03 kg/m³. -1138 -1139 Parameters -1140 ---------- -1141 densities : Series -1142 Series of densities -1143 depths : Series -1144 Series of depths corresponding to densities -1145 reference_depth : -1146 The depth at which to anchor the reference density -1147 delta : float, optional -1148 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. -1149 -1150 Returns -1151 ------- -1152 tuple -1153 A tuple containing the calculated MLD and the reference depth used to calculate MLD. -1154 """ -1155 # Ensure input is pandas Series and drop NA values -1156 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): -1157 densities = densities.dropna().reset_index(drop=True) -1158 depths = depths.dropna().reset_index(drop=True) -1159 -1160 # Convert to numeric and ensure no NaNs remain -1161 densities = densities.apply(pd.to_numeric, errors='coerce') -1162 depths = depths.apply(pd.to_numeric, errors='coerce') -1163 densities = densities.dropna().reset_index(drop=True) -1164 depths = depths.dropna().reset_index(drop=True) -1165 if densities.empty or depths.empty: -1166 return None, None -1167 -1168 # Convert pandas Series to numpy arrays for NumPy operations -1169 densities = densities.to_numpy() -1170 depths = depths.to_numpy() -1171 -1172 # Remove duplicates by averaging densities at the same depth -1173 unique_depths, indices = np.unique(depths, return_inverse=True) -1174 average_densities = np.zeros_like(unique_depths) -1175 np.add.at(average_densities, indices, densities) -1176 counts = np.zeros_like(unique_depths) -1177 np.add.at(counts, indices, 1) -1178 average_densities /= counts -1179 -1180 # Apply LOESS smoothing -1181 lowess = statsmodels.api.nonparametric.lowess -1182 smoothed = lowess(average_densities, unique_depths, frac=0.1) -1183 smoothed_depths, smoothed_densities = zip(*smoothed) -1184 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) -1185 -1186 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ -1187 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta -1188 and np.array(smoothed_densities) > reference_depth)[0] -1189 if exceeding_indices.size > 0: -1190 mld_depth = smoothed_depths[exceeding_indices[0]] -1191 return mld_depth, reference_depth + 1139 @staticmethod +1140 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): +1141 """ +1142 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and +1143 then determine the depth where the smoothed density exceeds the reference density +1144 by a predefined amount which defaults to 0.03 kg/m³. +1145 +1146 Parameters +1147 ---------- +1148 densities : Series +1149 Series of densities +1150 depths : Series +1151 Series of depths corresponding to densities +1152 reference_depth : +1153 The depth at which to anchor the reference density +1154 delta : float, optional +1155 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. +1156 +1157 Returns +1158 ------- +1159 tuple +1160 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1161 """ +1162 # Ensure input is pandas Series and drop NA values +1163 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): +1164 densities = densities.dropna().reset_index(drop=True) +1165 depths = depths.dropna().reset_index(drop=True) +1166 +1167 # Convert to numeric and ensure no NaNs remain +1168 densities = densities.apply(pd.to_numeric, errors='coerce') +1169 depths = depths.apply(pd.to_numeric, errors='coerce') +1170 densities = densities.dropna().reset_index(drop=True) +1171 depths = depths.dropna().reset_index(drop=True) +1172 if densities.empty or depths.empty: +1173 return None, None +1174 +1175 # Convert pandas Series to numpy arrays for NumPy operations +1176 densities = densities.to_numpy() +1177 depths = depths.to_numpy() +1178 +1179 # Remove duplicates by averaging densities at the same depth +1180 unique_depths, indices = np.unique(depths, return_inverse=True) +1181 average_densities = np.zeros_like(unique_depths) +1182 np.add.at(average_densities, indices, densities) +1183 counts = np.zeros_like(unique_depths) +1184 np.add.at(counts, indices, 1) +1185 average_densities /= counts +1186 +1187 # Apply LOESS smoothing +1188 lowess = statsmodels.api.nonparametric.lowess +1189 smoothed = lowess(average_densities, unique_depths, frac=0.1) +1190 smoothed_depths, smoothed_densities = zip(*smoothed) +1191 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) 1192 -1193 return None, None # If no depth meets the criterion +1193 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1194 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta +1195 and np.array(smoothed_densities) > reference_depth)[0] +1196 if exceeding_indices.size > 0: +1197 mld_depth = smoothed_depths[exceeding_indices[0]] +1198 return mld_depth, reference_depth +1199 +1200 return None, None # If no depth meets the criterion @@ -5390,49 +5421,49 @@ Returns
1139 @staticmethod +1140 def calculate_mld_loess(densities, depths, reference_depth, delta = 0.03): +1141 """ +1142 Calculates the mixed layer depth (MLD) using LOESS smoothing to first smooth the density profile and +1143 then determine the depth where the smoothed density exceeds the reference density +1144 by a predefined amount which defaults to 0.03 kg/m³. +1145 +1146 Parameters +1147 ---------- +1148 densities : Series +1149 Series of densities +1150 depths : Series +1151 Series of depths corresponding to densities +1152 reference_depth : +1153 The depth at which to anchor the reference density +1154 delta : float, optional +1155 The difference in density which would indicate the MLD, defaults to 0.03 kg/m. +1156 +1157 Returns +1158 ------- +1159 tuple +1160 A tuple containing the calculated MLD and the reference depth used to calculate MLD. +1161 """ +1162 # Ensure input is pandas Series and drop NA values +1163 if isinstance(densities, pd.Series) and isinstance(depths, pd.Series): +1164 densities = densities.dropna().reset_index(drop=True) +1165 depths = depths.dropna().reset_index(drop=True) +1166 +1167 # Convert to numeric and ensure no NaNs remain +1168 densities = densities.apply(pd.to_numeric, errors='coerce') +1169 depths = depths.apply(pd.to_numeric, errors='coerce') +1170 densities = densities.dropna().reset_index(drop=True) +1171 depths = depths.dropna().reset_index(drop=True) +1172 if densities.empty or depths.empty: +1173 return None, None +1174 +1175 # Convert pandas Series to numpy arrays for NumPy operations +1176 densities = densities.to_numpy() +1177 depths = depths.to_numpy() +1178 +1179 # Remove duplicates by averaging densities at the same depth +1180 unique_depths, indices = np.unique(depths, return_inverse=True) +1181 average_densities = np.zeros_like(unique_depths) +1182 np.add.at(average_densities, indices, densities) +1183 counts = np.zeros_like(unique_depths) +1184 np.add.at(counts, indices, 1) +1185 average_densities /= counts +1186 +1187 # Apply LOESS smoothing +1188 lowess = statsmodels.api.nonparametric.lowess +1189 smoothed = lowess(average_densities, unique_depths, frac=0.1) +1190 smoothed_depths, smoothed_densities = zip(*smoothed) +1191 reference_density = np.interp(reference_depth, smoothed_depths, smoothed_densities) 1192 -1193 return None, None # If no depth meets the criterion +1193 # Find the depth where density exceeds the reference density by more than 0.05 kg/m³ +1194 exceeding_indices = np.where(np.array(smoothed_densities) > reference_density + delta +1195 and np.array(smoothed_densities) > reference_depth)[0] +1196 if exceeding_indices.size > 0: +1197 mld_depth = smoothed_depths[exceeding_indices[0]] +1198 return mld_depth, reference_depth +1199 +1200 return None, None # If no depth meets the criterion
1195 @staticmethod -1196 def calculate_mean_surface_density(df, range_): -1197 """ -1198 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. -1199 -1200 Parameters -1201 ---------- -1202 df : DataFrame -1203 DataFrame containing density data. -1204 range_ : tuple or int -1205 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, -1206 or an integer indicating the number of rows from the start. -1207 -1208 Returns -1209 ------- -1210 float, None -1211 Mean density value of the specified sample or None if unable to calculate. -1212 """ -1213 min_depth = df.index.min() -1214 max_depth = df.index.max() -1215 -1216 if isinstance(range_, tuple): -1217 start, end = range_ -1218 -1219 # Adjust 'start' to ensure it is within the valid range -1220 start = max(start, min_depth) -1221 -1222 # Adjust 'end' to ensure it does not exceed the maximum depth value -1223 end = min(end, max_depth) -1224 -1225 # Ensure start is less than end -1226 if start <= end: -1227 return df.loc[start:end, 'density'].mean() -1228 else: -1229 return None -1230 -1231 elif isinstance(range_, int): -1232 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length -1233 range_ = min(range_, len(df)) -1234 return df.iloc[:range_, df.columns.get_loc('density')].mean() -1235 -1236 else: -1237 raise ValueError("Invalid range type. Must be tuple or int.") + 1202 @staticmethod +1203 def calculate_mean_surface_density(df, range_): +1204 """ +1205 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1206 +1207 Parameters +1208 ---------- +1209 df : DataFrame +1210 DataFrame containing density data. +1211 range_ : tuple or int +1212 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, +1213 or an integer indicating the number of rows from the start. +1214 +1215 Returns +1216 ------- +1217 float, None +1218 Mean density value of the specified sample or None if unable to calculate. +1219 """ +1220 min_depth = df.index.min() +1221 max_depth = df.index.max() +1222 +1223 if isinstance(range_, tuple): +1224 start, end = range_ +1225 +1226 # Adjust 'start' to ensure it is within the valid range +1227 start = max(start, min_depth) +1228 +1229 # Adjust 'end' to ensure it does not exceed the maximum depth value +1230 end = min(end, max_depth) +1231 +1232 # Ensure start is less than end +1233 if start <= end: +1234 return df.loc[start:end, 'density'].mean() +1235 else: +1236 return None +1237 +1238 elif isinstance(range_, int): +1239 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length +1240 range_ = min(range_, len(df)) +1241 return df.iloc[:range_, df.columns.get_loc('density')].mean() +1242 +1243 else: +1244 raise ValueError("Invalid range type. Must be tuple or int.") @@ -5469,20 +5500,20 @@ Returns
1202 @staticmethod +1203 def calculate_mean_surface_density(df, range_): +1204 """ +1205 Calculates the mean surface density from the CTD data, for a specified range or the entire dataset if the range is larger. +1206 +1207 Parameters +1208 ---------- +1209 df : DataFrame +1210 DataFrame containing density data. +1211 range_ : tuple or int +1212 Tuple indicating the (start, end) indices for the range of rows to be included in the calculation, +1213 or an integer indicating the number of rows from the start. +1214 +1215 Returns +1216 ------- +1217 float, None +1218 Mean density value of the specified sample or None if unable to calculate. +1219 """ +1220 min_depth = df.index.min() +1221 max_depth = df.index.max() +1222 +1223 if isinstance(range_, tuple): +1224 start, end = range_ +1225 +1226 # Adjust 'start' to ensure it is within the valid range +1227 start = max(start, min_depth) +1228 +1229 # Adjust 'end' to ensure it does not exceed the maximum depth value +1230 end = min(end, max_depth) +1231 +1232 # Ensure start is less than end +1233 if start <= end: +1234 return df.loc[start:end, 'density'].mean() +1235 else: +1236 return None +1237 +1238 elif isinstance(range_, int): +1239 # Use 'range_' as the number of rows from the start, adjust if it exceeds the DataFrame length +1240 range_ = min(range_, len(df)) +1241 return df.iloc[:range_, df.columns.get_loc('density')].mean() +1242 +1243 else: +1244 raise ValueError("Invalid range type. Must be tuple or int.")
1240class CTDError(Exception): -1241 """ -1242 Exception raised for CTD related errors. -1243 -1244 Parameters -1245 ---------- -1246 filename: input dataset which caused the error -1247 message: message -- explanation of the error -1248 """ -1249 -1250 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): -1251 self.filename = filename -1252 self.message = message -1253 super().__init__(self.message) + 1247class CTDError(Exception): +1248 """ +1249 Exception raised for CTD related errors. +1250 +1251 Parameters +1252 ---------- +1253 filename: input dataset which caused the error +1254 message: message -- explanation of the error +1255 """ +1256 +1257 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): +1258 self.filename = filename +1259 self.message = message +1260 super().__init__(self.message) @@ -5507,10 +5538,10 @@ Parameters
1247class CTDError(Exception): +1248 """ +1249 Exception raised for CTD related errors. +1250 +1251 Parameters +1252 ---------- +1253 filename: input dataset which caused the error +1254 message: message -- explanation of the error +1255 """ +1256 +1257 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): +1258 self.filename = filename +1259 self.message = message +1260 super().__init__(self.message)
1250 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): -1251 self.filename = filename -1252 self.message = message -1253 super().__init__(self.message) + 1257 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): +1258 self.filename = filename +1259 self.message = message +1260 super().__init__(self.message) @@ -5562,31 +5593,31 @@ Inherited Members
1257 def __init__(self, filename, message=" Unknown, check to make sure your mastersheet is in your current directory."): +1258 self.filename = filename +1259 self.message = message +1260 super().__init__(self.message)
1256def run_default(plot=False): -1257 _reset_file_environment() -1258 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") -1259 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) -1260 for file in rsk_files_list: -1261 try: -1262 my_data = CTD(file) -1263 my_data.add_filename_to_table() -1264 my_data.save_to_csv("output.csv") -1265 my_data.add_location_to_table() -1266 my_data.remove_non_positive_samples() -1267 my_data.clean("practicalsalinity", 'salinitydiff') -1268 my_data.add_absolute_salinity() -1269 my_data.add_density() -1270 my_data.add_overturns() -1271 my_data.add_mld(1) -1272 my_data.add_mld(5) -1273 my_data.save_to_csv("outputclean.csv") -1274 if plot: -1275 my_data.plot_depth_density_salinity_mld_scatter() -1276 my_data.plot_depth_temperature_scatter() -1277 my_data.plot_depth_salinity_density_mld_line() -1278 except Exception as e: -1279 print(f"Error processing file: '{file}' {e}") -1280 continue + 1263def run_default(plot=False): +1264 _reset_file_environment() +1265 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") +1266 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) +1267 for file in rsk_files_list: +1268 try: +1269 my_data = CTD(file) +1270 my_data.add_filename_to_table() +1271 my_data.save_to_csv("output.csv") +1272 my_data.add_location_to_table() +1273 my_data.remove_non_positive_samples() +1274 my_data.clean("practicalsalinity", 'salinitydiff') +1275 my_data.add_absolute_salinity() +1276 my_data.add_density() +1277 my_data.add_overturns() +1278 my_data.add_mld(1) +1279 my_data.add_mld(5) +1280 my_data.save_to_csv("outputclean.csv") +1281 if plot: +1282 my_data.plot_depth_density_salinity_mld_scatter() +1283 my_data.plot_depth_temperature_scatter() +1284 my_data.plot_depth_salinity_density_mld_line() +1285 except Exception as e: +1286 print(f"Error processing file: '{file}' {e}") +1287 continue @@ -5604,18 +5635,18 @@ Inherited Members
1263def run_default(plot=False): +1264 _reset_file_environment() +1265 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") +1266 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) +1267 for file in rsk_files_list: +1268 try: +1269 my_data = CTD(file) +1270 my_data.add_filename_to_table() +1271 my_data.save_to_csv("output.csv") +1272 my_data.add_location_to_table() +1273 my_data.remove_non_positive_samples() +1274 my_data.clean("practicalsalinity", 'salinitydiff') +1275 my_data.add_absolute_salinity() +1276 my_data.add_density() +1277 my_data.add_overturns() +1278 my_data.add_mld(1) +1279 my_data.add_mld(5) +1280 my_data.save_to_csv("outputclean.csv") +1281 if plot: +1282 my_data.plot_depth_density_salinity_mld_scatter() +1283 my_data.plot_depth_temperature_scatter() +1284 my_data.plot_depth_salinity_density_mld_line() +1285 except Exception as e: +1286 print(f"Error processing file: '{file}' {e}") +1287 continue
1283def merge_all_in_folder(): -1284 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") -1285 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) -1286 for file in rsk_files_list: -1287 try: -1288 my_data = CTD(file) -1289 my_data.add_filename_to_table() -1290 my_data.add_location_to_table() -1291 my_data.save_to_csv("output.csv") -1292 except Exception as e: -1293 print(e) -1294 continue + 1290def merge_all_in_folder(): +1291 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") +1292 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) +1293 for file in rsk_files_list: +1294 try: +1295 my_data = CTD(file) +1296 my_data.add_filename_to_table() +1297 my_data.add_location_to_table() +1298 my_data.save_to_csv("output.csv") +1299 except Exception as e: +1300 print(e) +1301 continue @@ -5633,19 +5664,19 @@ Inherited Members
1290def merge_all_in_folder(): +1291 CTD.master_sheet_path = os.path.join(_get_cwd(), "FjordPhyto MASTER SHEET.xlsx") +1292 rsk_files_list = get_rsk_filenames_in_dir(_get_cwd()) +1293 for file in rsk_files_list: +1294 try: +1295 my_data = CTD(file) +1296 my_data.add_filename_to_table() +1297 my_data.add_location_to_table() +1298 my_data.save_to_csv("output.csv") +1299 except Exception as e: +1300 print(e) +1301 continue
1296def get_rsk_filenames_in_dir(working_directory): -1297 rsk_files_list = [] -1298 rsk_filenames_no_path = [] -1299 for filename in os.listdir(working_directory): -1300 if filename.endswith('.rsk'): -1301 for filepath in rsk_files_list: -1302 filename_no_path = ('_'.join(filepath.split("/")[-1].split("_")[0:3]).split('.rsk')[0]) -1303 if filename_no_path in rsk_filenames_no_path: -1304 continue -1305 rsk_filenames_no_path.append(filename_no_path) -1306 file_path = os.path.join(working_directory, filename) -1307 rsk_files_list.append(file_path) -1308 return rsk_files_list + 1303def get_rsk_filenames_in_dir(working_directory): +1304 rsk_files_list = [] +1305 rsk_filenames_no_path = [] +1306 for filename in os.listdir(working_directory): +1307 if filename.endswith('.rsk'): +1308 for filepath in rsk_files_list: +1309 filename_no_path = ('_'.join(filepath.split("/")[-1].split("_")[0:3]).split('.rsk')[0]) +1310 if filename_no_path in rsk_filenames_no_path: +1311 continue +1312 rsk_filenames_no_path.append(filename_no_path) +1313 file_path = os.path.join(working_directory, filename) +1314 rsk_files_list.append(file_path) +1315 return rsk_files_list @@ -5663,64 +5694,64 @@ Inherited Members
1303def get_rsk_filenames_in_dir(working_directory): +1304 rsk_files_list = [] +1305 rsk_filenames_no_path = [] +1306 for filename in os.listdir(working_directory): +1307 if filename.endswith('.rsk'): +1308 for filepath in rsk_files_list: +1309 filename_no_path = ('_'.join(filepath.split("/")[-1].split("_")[0:3]).split('.rsk')[0]) +1310 if filename_no_path in rsk_filenames_no_path: +1311 continue +1312 rsk_filenames_no_path.append(filename_no_path) +1313 file_path = os.path.join(working_directory, filename) +1314 rsk_files_list.append(file_path) +1315 return rsk_files_list
1349def main(): -1350 run_default(True) -1351 if len(sys.argv) < 2: -1352 print("Usage: ctdfjorder <command> [arguments]") -1353 print("Commands:") -1354 print(" process <file> Process a single RSK file") -1355 print(" merge Merge all RSK files in the current folder") -1356 print(" default Run the default processing pipeline") -1357 sys.exit(1) -1358 -1359 command = sys.argv[1] -1360 -1361 if command == "process": -1362 if len(sys.argv) < 3: -1363 print("Usage: ctdfjorder process <file>") -1364 sys.exit(1) + 1356def main(): +1357 run_default(True) +1358 if len(sys.argv) < 2: +1359 print("Usage: ctdfjorder <command> [arguments]") +1360 print("Commands:") +1361 print(" process <file> Process a single RSK file") +1362 print(" merge Merge all RSK files in the current folder") +1363 print(" default Run the default processing pipeline") +1364 sys.exit(1) 1365 -1366 file_path = sys.argv[2] -1367 try: -1368 ctd = CTD(file_path) -1369 ctd.add_filename_to_table() -1370 ctd.save_to_csv("output.csv") -1371 ctd.add_location_to_table() -1372 ctd.remove_non_positive_samples() -1373 ctd.clean("practicalsalinity", 'salinitydiff') -1374 ctd.add_absolute_salinity() -1375 ctd.add_density() -1376 ctd.add_overturns() -1377 ctd.add_mld(0) -1378 ctd.add_mld(10) -1379 ctd.save_to_csv("outputclean.csv") -1380 print("Processing completed successfully.") -1381 except Exception as e: -1382 print(f"Error processing file: '{file_path}' {e}") -1383 -1384 elif command == "merge": -1385 merge_all_in_folder() -1386 print("Merging completed successfully.") -1387 -1388 elif command == "default": -1389 run_default() -1390 print("Default processing completed successfully.") -1391 -1392 elif command == "defaultplotall": -1393 run_default(True) -1394 print("Default processing completed successfully.") -1395 -1396 else: -1397 print(f"Unknown command: {command}") -1398 print("Usage: ctdfjorder <command> [arguments]") -1399 print("Commands:") -1400 print(" process <file> Process a single RSK file") -1401 print(" merge Merge all RSK files in the current folder") -1402 print(" default Run the default processing pipeline") -1403 print(" defaultplotall Run the default processing pipeline and create plots") -1404 print("CWD:") -1405 print(_get_cwd()) -1406 sys.exit(1) +1366 command = sys.argv[1] +1367 +1368 if command == "process": +1369 if len(sys.argv) < 3: +1370 print("Usage: ctdfjorder process <file>") +1371 sys.exit(1) +1372 +1373 file_path = sys.argv[2] +1374 try: +1375 ctd = CTD(file_path) +1376 ctd.add_filename_to_table() +1377 ctd.save_to_csv("output.csv") +1378 ctd.add_location_to_table() +1379 ctd.remove_non_positive_samples() +1380 ctd.clean("practicalsalinity", 'salinitydiff') +1381 ctd.add_absolute_salinity() +1382 ctd.add_density() +1383 ctd.add_overturns() +1384 ctd.add_mld(0) +1385 ctd.add_mld(10) +1386 ctd.save_to_csv("outputclean.csv") +1387 print("Processing completed successfully.") +1388 except Exception as e: +1389 print(f"Error processing file: '{file_path}' {e}") +1390 +1391 elif command == "merge": +1392 merge_all_in_folder() +1393 print("Merging completed successfully.") +1394 +1395 elif command == "default": +1396 run_default() +1397 print("Default processing completed successfully.") +1398 +1399 elif command == "defaultplotall": +1400 run_default(True) +1401 print("Default processing completed successfully.") +1402 +1403 else: +1404 print(f"Unknown command: {command}") +1405 print("Usage: ctdfjorder <command> [arguments]") +1406 print("Commands:") +1407 print(" process <file> Process a single RSK file") +1408 print(" merge Merge all RSK files in the current folder") +1409 print(" default Run the default processing pipeline") +1410 print(" defaultplotall Run the default processing pipeline and create plots") +1411 print("CWD:") +1412 print(_get_cwd()) +1413 sys.exit(1) @@ -5728,5 +5759,186 @@ Inherited Members -
1356def main(): +1357 run_default(True) +1358 if len(sys.argv) < 2: +1359 print("Usage: ctdfjorder <command> [arguments]") +1360 print("Commands:") +1361 print(" process <file> Process a single RSK file") +1362 print(" merge Merge all RSK files in the current folder") +1363 print(" default Run the default processing pipeline") +1364 sys.exit(1) 1365 -1366 file_path = sys.argv[2] -1367 try: -1368 ctd = CTD(file_path) -1369 ctd.add_filename_to_table() -1370 ctd.save_to_csv("output.csv") -1371 ctd.add_location_to_table() -1372 ctd.remove_non_positive_samples() -1373 ctd.clean("practicalsalinity", 'salinitydiff') -1374 ctd.add_absolute_salinity() -1375 ctd.add_density() -1376 ctd.add_overturns() -1377 ctd.add_mld(0) -1378 ctd.add_mld(10) -1379 ctd.save_to_csv("outputclean.csv") -1380 print("Processing completed successfully.") -1381 except Exception as e: -1382 print(f"Error processing file: '{file_path}' {e}") -1383 -1384 elif command == "merge": -1385 merge_all_in_folder() -1386 print("Merging completed successfully.") -1387 -1388 elif command == "default": -1389 run_default() -1390 print("Default processing completed successfully.") -1391 -1392 elif command == "defaultplotall": -1393 run_default(True) -1394 print("Default processing completed successfully.") -1395 -1396 else: -1397 print(f"Unknown command: {command}") -1398 print("Usage: ctdfjorder <command> [arguments]") -1399 print("Commands:") -1400 print(" process <file> Process a single RSK file") -1401 print(" merge Merge all RSK files in the current folder") -1402 print(" default Run the default processing pipeline") -1403 print(" defaultplotall Run the default processing pipeline and create plots") -1404 print("CWD:") -1405 print(_get_cwd()) -1406 sys.exit(1) +1366 command = sys.argv[1] +1367 +1368 if command == "process": +1369 if len(sys.argv) < 3: +1370 print("Usage: ctdfjorder process <file>") +1371 sys.exit(1) +1372 +1373 file_path = sys.argv[2] +1374 try: +1375 ctd = CTD(file_path) +1376 ctd.add_filename_to_table() +1377 ctd.save_to_csv("output.csv") +1378 ctd.add_location_to_table() +1379 ctd.remove_non_positive_samples() +1380 ctd.clean("practicalsalinity", 'salinitydiff') +1381 ctd.add_absolute_salinity() +1382 ctd.add_density() +1383 ctd.add_overturns() +1384 ctd.add_mld(0) +1385 ctd.add_mld(10) +1386 ctd.save_to_csv("outputclean.csv") +1387 print("Processing completed successfully.") +1388 except Exception as e: +1389 print(f"Error processing file: '{file_path}' {e}") +1390 +1391 elif command == "merge": +1392 merge_all_in_folder() +1393 print("Merging completed successfully.") +1394 +1395 elif command == "default": +1396 run_default() +1397 print("Default processing completed successfully.") +1398 +1399 elif command == "defaultplotall": +1400 run_default(True) +1401 print("Default processing completed successfully.") +1402 +1403 else: +1404 print(f"Unknown command: {command}") +1405 print("Usage: ctdfjorder <command> [arguments]") +1406 print("Commands:") +1407 print(" process <file> Process a single RSK file") +1408 print(" merge Merge all RSK files in the current folder") +1409 print(" default Run the default processing pipeline") +1410 print(" defaultplotall Run the default processing pipeline and create plots") +1411 print("CWD:") +1412 print(_get_cwd()) +1413 sys.exit(1)