Skip to content

Commit

Permalink
Merge pull request #18 from EnvironmentalSystems/dev
Browse files Browse the repository at this point in the history
OOP Refactoring
  • Loading branch information
sjordan29 authored Jan 16, 2023
2 parents 2daed7e + 119ce9b commit b6f097f
Show file tree
Hide file tree
Showing 13 changed files with 7,400 additions and 2,356 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -179,4 +179,4 @@ examples/data/formatted_dser_header.txt
examples/data/formatted_qser_data.txt
examples/data/formatted_qser_data_B.txt
examples/data/formatted_qser_header.txt
examples/data/OhioRiver_m.p22.hdf
examples/data/OhioRiver_m.*.hdf
78 changes: 32 additions & 46 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,64 +1,50 @@
name: ras2d_wq
name: clearwater_riverine
channels:
- conda-forge
# - nodefaults
- defaults

dependencies:
- python =3.9*

# Pangeo
- scipy
- intake # Intake* installs most Pangeo libs
- intake-geopandas=0.8.0
- intake-parquet
- intake-xarray
- intake-thredds
- intake-stac # SpatioTemporal Asset Catalogs (STAC), https://stacspec.org
- intake-esm # Earth System Model (ESM) catalog, https://stacspec.org
- rasterio
- s3fs # Access to Amazon S3 filesystem
- cftime # Decodes time units and variable values for netCDF using CF conventions
- python =3.10
- scipy # installs numpy, pandas and most pyData libraries
- geopandas
- spatialpandas
- xarray
- cf_xarray # Interprets CF Convention attributes for Xarray objects
- gridgeo # Convert UGRID and SGRID compliant files to a variety of geo-like objects. https://github.com/pyoceans/gridgeo
- pytables >=3.7 # PyTables is optional, but required for Dask DataFrame.read_hdf()
- h5py
- hdf5plugin # HDF5 compression filters for h5py
- xlrd # read legacy Excel files (.xls)
- openpyxl # read/write Excel 2010+ files (.xlsx & .xlsm)
- gridgeo # Convert UGRID and SGRID compliant files to a variety of geo-like objects. https://github.com/pyoceans/gridgeo
- netCDF4
- zarr
- numcodecs # buffer compression and transformation codecs for use by zarr
- bottleneck # speeds up NaN-skipping and rolling window aggregations by a large factor
- fastparquet

# HoloViz, https://holoviz.org
- hvplot # hvPlot installs most HoloViz libs
- geoviews
# HoloViz optional libs used by bokeh.io to save plots to static files such as png.
# https://holoviews.org/user_guide/Plotting_with_Bokeh.html#exporting-static-files
- selenium # WebDriver for browser automation
- firefox geckodriver
# https://www.selenium.dev/documentation/webdriver/getting_started/install_drivers/
# - webdriver-manager # https://github.com/SergeyPirogov/webdriver_manager

# Interactivity & Visualization via Jupyter Notebooks
- jupyterlab
- nb_conda # Conda environment & package access extension from within Jupyter
- ipywidgets # Required for HoloViz interactivity
- seaborn

# Optional extension dependencies for JupyterLab
# Interactivity via Jupyter Notebooks
- jupyterlab
- nodejs # required for many extensions
- ipympl # jupyter-matplotlib, https://github.com/matplotlib/ipympl
# - qgrid # https://github.com/quantopian/qgrid
- ipywidgets # Required for HoloViz interactivity

# Dev tools (optional)
# - python-language-server
- jupyter-lsp-python # Includes both the server extension (jupyter-lsp) and pyls third-party server (python-language-server)
- jupyterlab-lsp # Docs at https://github.com/krassowski/jupyterlab-lsp
# Dev tools: Language Server Protocol (LSP) (Optional), for:
# code navigation + hover suggestions + linters + autocompletion + rename
- python-lsp-server # LSP extension for Python (pylsp), including:
# Rope for Completions and renaming
# Pyflakes linter to detect various errors
# McCabe linter for complexity checking
# pycodestyle linter for style checking
# pydocstyle linter for docstring style checking (disabled by default)
# autopep8 for code formatting
# YAPF for code formatting (preferred over autopep8)
# flake8 for error checking (disabled by default)
# pylint for code linting (disabled by default)
- pylsp-mypy # MyPy type checking for Python >=3.7.
- jupyterlab-lsp # Provides both server extension and lab extension

# package management
- conda
- conda-build

# PIP install requirements only if it is not possible with conda
# https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#using-pip-in-an-environment
- pip
- pip:
# - lckr-jupyterlab-variableinspector # https://github.com/lckr/jupyterlab-variableInspector
#turning off variableInspector in case it is the cause for slowness!
# - jupyterlab_hdf # https://github.com/jupyterlab/jupyterlab-hdf5
- conda-libmamba-solver # Faster env solver, https://conda.github.io/conda-libmamba-solver/
1,879 changes: 581 additions & 1,298 deletions examples/Ohio River.ipynb

Large diffs are not rendered by default.

5,492 changes: 5,492 additions & 0 deletions examples/dev_sandbox/hvplot_tests.ipynb

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion src/clearwater_riverine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
__version__ = '0.1.0'

# populate package namespace
import clearwater_riverine
from clearwater_riverine import variables
from clearwater_riverine.io import hdf, inputs, outputs
from clearwater_riverine import mesh, utilities, linalg
from clearwater_riverine.ras2dwq import *
214 changes: 214 additions & 0 deletions src/clearwater_riverine/io/hdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
from typing import Dict, Any

import h5py
import xarray as xr
# import variables
import numpy as np
import pandas as pd
import datetime

from clearwater_riverine import variables

def _hdf_internal_paths(project_name):
""" Define HDF paths to relevant data"""
hdf_paths = {
variables.NODE_X: f'Geometry/2D Flow Areas/{project_name}/FacePoints Coordinate',
variables.NODE_Y: f'Geometry/2D Flow Areas/{project_name}/FacePoints Coordinate',
variables.TIME: 'Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/Time Date Stamp',
variables.FACE_NODES: f'Geometry/2D Flow Areas/{project_name}/Cells FacePoint Indexes',
variables.EDGE_NODES: f'Geometry/2D Flow Areas/{project_name}/Faces FacePoint Indexes',
variables.EDGE_FACE_CONNECTIVITY: f'Geometry/2D Flow Areas/{project_name}/Faces Cell Indexes',
variables.FACE_X: f'Geometry/2D Flow Areas/{project_name}/Cells Center Coordinate',
variables.FACE_Y: f'Geometry/2D Flow Areas/{project_name}/Cells Center Coordinate',
variables.FACE_SURFACE_AREA: f'Geometry/2D Flow Areas/{project_name}/Cells Surface Area',
variables.EDGE_VELOCITY: f'Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/2D Flow Areas/{project_name}/Face Velocity',
variables.EDGE_LENGTH: f'Geometry/2D Flow Areas/{project_name}/Faces NormalUnitVector and Length',
variables.WATER_SURFACE_ELEVATION: f'Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/2D Flow Areas/{project_name}/Water Surface',
variables.FLOW_ACROSS_FACE: f'Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/2D Flow Areas/{project_name}/Face Flow',
variables.VOLUME: f'Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/2D Flow Areas/{project_name}/Cell Volume',
'project_name': 'Geometry/2D Flow Areas/Attributes',
'binary_time_stamps': 'Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/Time Date Stamp',
'volume elevation info': f'Geometry/2D Flow Areas/{project_name}/Cells Volume Elevation Info',
'volume_elevation_values': f'Geometry/2D Flow Areas/{project_name}/Cells Volume Elevation Values',
'area_elevation_info': f'Geometry/2D Flow Areas/{project_name}/Faces Area Elevation Info',
'area_elevation_values': f'Geometry/2D Flow Areas/{project_name}/Faces Area Elevation Values',
'normalunitvector_length': f'Geometry/2D Flow Areas/{project_name}/Faces NormalUnitVector and Length',
'boundary_condition_external_faces': 'Geometry/Boundary Condition Lines/External Faces',
'boundary_condition_attributes': 'Geometry/Boundary Condition Lines/Attributes/',
}
return hdf_paths

def _parse_attributes(dataset) -> Dict[str, Any]:
"""Parse the HDF5 attributes array, convert binary strings to Python strings, and return a dictionary of attributes"""
attrs = {}
for key, value in dataset.attrs.items():
if type(value) == np.bytes_:
attrs[key] = value.decode('ascii')
elif type(value) == np.ndarray:
values = []
for v in value:
if type(v) == np.bytes_:
values.append(v.decode('ascii'))
else:
values.append(v)
attrs[key] = values
else:
attrs[key] = value
return attrs

def _hdf_to_xarray(dataset, dims, attrs=None) -> xr.DataArray:
"""Read n-dimensional HDF5 dataset and return it as an xarray.DataArray"""
if attrs == None:
attrs = _parse_attributes(dataset)
data_array = xr.DataArray(dataset[()], dims = dims, attrs = attrs)
return data_array

def _hdf_to_dataframe(dataset) -> pd.DataFrame:
"""Read n-dimensional HDF5 dataset and return it as an pandas DataFrame"""
attrs = _parse_attributes(dataset)
df = pd.DataFrame(dataset[()], columns = attrs['Column'])
return df

class HDFReader:
""" Reads RAS hydrodynamic data required for WQ calculations in Clearwater Riverine Model from HDF file"""
def __init__(self, file_path: str) -> None:
"""Opens HDF file and reads information required to set-up model mesh"""
self.file_path = file_path
self.infile = h5py.File(file_path, 'r')
self.project_name = self.infile['Geometry/2D Flow Areas/Attributes'][()][0][0].decode('UTF-8')
self.paths = _hdf_internal_paths(self.project_name)

def define_coordinates(self, mesh: xr.Dataset):
"""Populate Coordinates and Dimensions"""
# x-coordinates
mesh = mesh.assign_coords(
node_x=xr.DataArray(
data = self.infile[self.paths[variables.NODE_X]][()].T[0],
dims=('node',),
)
)
# y-coordinates
mesh = mesh.assign_coords(
node_y=xr.DataArray(
data=self.infile[self.paths[variables.NODE_X]][()].T[1],
dims=('node',),
)
)
# time
time_stamps_binary = self.infile[self.paths['binary_time_stamps']][()]
time_stamps = [x.decode("utf8") for x in time_stamps_binary]
xr_time_stamps = [datetime.datetime.strptime(x, '%d%b%Y %H:%M:%S') for x in time_stamps]
mesh = mesh.assign_coords(
time=xr.DataArray(
data=[datetime.datetime.strptime(x, '%d%b%Y %H:%M:%S') for x in time_stamps],
dims=('time',),
)
)
return mesh

def define_topology(self, mesh: xr.Dataset):
"""Define mesh topology """
mesh[variables.FACE_NODES] = xr.DataArray(
data=self.infile[f'Geometry/2D Flow Areas/{self.project_name}/Cells FacePoint Indexes'][()],
coords={
"face_x": ('nface', self.infile[self.paths[variables.FACE_X]][()].T[0]),
"face_y": ('nface', self.infile[self.paths[variables.FACE_Y]][()].T[1]),
},
dims=('nface', 'nmax_face'),
attrs={
'cf_role': 'face_node_connectivity',
'long_name': 'Vertex nodes of mesh faces (counterclockwise)',
'start_index': 0,
'_FillValue': -1
})
mesh[variables.EDGE_NODES] = xr.DataArray(
data=self.infile[self.paths[variables.EDGE_NODES]][()],
dims=("nedge", '2'),
attrs={
'cf_role': 'edge_node_connectivity',
'long_name': 'Vertex nodes of mesh edges',
'start_index': 0
})
mesh[variables.EDGE_FACE_CONNECTIVITY] = xr.DataArray(
data=self.infile[self.paths[variables.EDGE_FACE_CONNECTIVITY]][()],
dims=("nedge", '2'),
attrs={
'cf_role': 'edge_face_connectivity',
'long_name': 'neighbor faces for edges',
'start_index': 0
})

def define_hydrodynamics(self, mesh: xr.Dataset):
"""Populates hydrodynamic data in UGRID-compliant xarray."""
mesh[variables.EDGES_FACE1] = _hdf_to_xarray(
mesh['edge_face_connectivity'].T[0],
('nedge'),
attrs={'Units':''}
)
mesh[variables.EDGES_FACE2] = _hdf_to_xarray(
mesh['edge_face_connectivity'].T[1],
('nedge'),
attrs={'Units':''}
)

nreal = mesh[variables.EDGE_FACE_CONNECTIVITY].T[0].values.max()
mesh.attrs[variables.NUMBER_OF_REAL_CELLS] = nreal

mesh[variables.FACE_SURFACE_AREA] = _hdf_to_xarray(
self.infile[self.paths[variables.FACE_SURFACE_AREA]],
("nface")
)
mesh[variables.EDGE_VELOCITY] = _hdf_to_xarray(
self.infile[self.paths[variables.EDGE_VELOCITY]],
('time', 'nedge'),
)
mesh[variables.EDGE_LENGTH] = _hdf_to_xarray(
self.infile[self.paths[variables.EDGE_LENGTH]][:,2],
('nedge'),
attrs={'Units': 'ft'}
)
mesh[variables.WATER_SURFACE_ELEVATION] = _hdf_to_xarray(
self.infile[self.paths[variables.WATER_SURFACE_ELEVATION]],
(['time', 'nface'])
)
try:
mesh[variables.VOLUME] = _hdf_to_xarray(
self.infile[self.paths[variables.VOLUME]],
('time', 'nface')
)
mesh[variables.VOLUME][:, mesh.attrs[variables.NUMBER_OF_REAL_CELLS]+1:] = 0 # revisit this
except KeyError:
mesh.attrs['volume_calculation_required'] = True
mesh.attrs['face_volume_elevation_info'] = _hdf_to_dataframe(self.infile[self.paths['volume elevation info']])
mesh.attrs['face_volume_elevation_values'] = _hdf_to_dataframe(self.infile[self.paths['volume_elevation_values']])
try:
mesh[variables.FLOW_ACROSS_FACE] = _hdf_to_xarray(
self.infile[self.paths[variables.FLOW_ACROSS_FACE]],
('time', 'nedge')
)
except:
mesh.attrs['face_area_calculation_required'] = True
mesh.attrs['face_area_elevation_info'] = _hdf_to_dataframe(self.infile[self.paths['area_elevation_info']])
mesh.attrs['face_area_elevation_values'] = _hdf_to_dataframe(self.infile[self.paths['area_elevation_values']])
mesh.attrs['face_normalunitvector_and_length'] = _hdf_to_dataframe(self.infile[self.paths['normalunitvector_length']])
mesh.attrs['face_cell_indexes_df'] = _hdf_to_dataframe(self.infile[self.paths[variables.EDGE_FACE_CONNECTIVITY]])



def define_boundary_hydrodynamics(self, mesh: xr.Dataset):
"""Read necessary information on hydrodynamics"""
external_faces = pd.DataFrame(self.infile[self.paths['boundary_condition_external_faces']][()])
attributes = pd.DataFrame(self.infile[self.paths['boundary_condition_attributes']][()])
str_df = attributes.select_dtypes([object])
str_df = str_df.stack().str.decode('utf-8').unstack()
for col in str_df:
attributes[col] = str_df[col]
boundary_attributes = attributes
# merge attributes and boundary condition data
boundary_attributes['BC Line ID'] = boundary_attributes.index
mesh.attrs['boundary_data'] = pd.merge(external_faces, boundary_attributes, on = 'BC Line ID', how = 'left')

def close(self):
"""Close HDF file"""
self.infile.close()

Loading

0 comments on commit b6f097f

Please sign in to comment.