Merge branch 'main' into bugfix/1541-swy-qfi-bad-sum

natcap · Apr 3, 2024 · 23b3354 · 23b3354
2 parents aa67a9c + 8139d2b
commit 23b3354
Show file tree

Hide file tree

Showing 16 changed files with 330 additions and 273 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -39,6 +39,9 @@
 Unreleased Changes
 ------------------
 * General
+    * Validation now covers file paths contained in CSVs. CSV validation
+      will fail if the files listed in a CSV fail to validate.
+      https://github.com/natcap/invest/issues/327
     * We have updated validation in several ways that will improve the
       developer experience of working with InVEST models, and we hope will also
       improve the user experience:
@@ -59,7 +62,6 @@ Unreleased Changes
         versions of InVEST would skip these parameters' type-specific
         validation. Now, these parameters will be validated with their
         type-specific validation checks.
-
 * Annual Water Yield
     * Added the results_suffix to a few intermediate files where it was
       missing. https://github.com/natcap/invest/issues/1517
@@ -73,6 +75,17 @@ Unreleased Changes
       a block area and not summed pixelwise. This caused the quickflow 
       output ``QF.tif`` to have malformed values.
       https://github.com/natcap/invest/issues/1541
+* SDR
+    * Fixed an issue encountered in the sediment deposition function where
+      rasters with more than 2^32 pixels would raise a cryptic error relating
+      to negative dimensions. https://github.com/natcap/invest/issues/1431
+    * Optimized the creation of the summary vector by minimizing the number of
+      times the target vector needs to be rasterized.
+* Wind Energy
+    * Fixed a bug where some number inputs were not being properly cast to
+      ``float`` or ``int`` types. If the inputs happened to be passed as
+      a ``str`` this caused unintended side effects such as a concatenation
+      error. (https://github.com/natcap/invest/issues/1498)
 * Urban Nature Access
     * Fixed a ``NameError`` that occurred when running the model using
       search radii defined per population group with an exponential search
@@ -96,12 +109,6 @@ Unreleased Changes
     * Fixed an issue where an LULC raster without a nodata value would
       always raise in exception during reclassification.
       https://github.com/natcap/invest/issues/1539
-* SDR
-    * Fixed an issue encountered in the sediment deposition function where
-      rasters with more than 2^32 pixels would raise a cryptic error relating
-      to negative dimensions. https://github.com/natcap/invest/issues/1431
-    * Optimized the creation of the summary vector by minimizing the number of
-      times the target vector needs to be rasterized.
 
 3.14.1 (2023-12-18)
 -------------------

diff --git a/Makefile b/Makefile
@@ -6,7 +6,7 @@ GIT_SAMPLE_DATA_REPO_REV    := 2e7cd618c661ec3f3b2a3bddfd2ce7d4704abc05
 
 GIT_TEST_DATA_REPO          := https://bitbucket.org/natcap/invest-test-data.git
 GIT_TEST_DATA_REPO_PATH     := $(DATA_DIR)/invest-test-data
-GIT_TEST_DATA_REPO_REV      := da013683e80ea094fbb2309197e2488c02794da8
+GIT_TEST_DATA_REPO_REV      := 324abde73e1d770ad75921466ecafd1ec6297752
 
 GIT_UG_REPO                 := https://github.com/natcap/invest.users-guide
 GIT_UG_REPO_PATH            := doc/users-guide

diff --git a/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py b/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
@@ -116,8 +116,6 @@
 INVALID_ANALYSIS_YEAR_MSG = gettext(
     "Analysis year {analysis_year} must be >= the latest snapshot year "
     "({latest_year})")
-INVALID_SNAPSHOT_RASTER_MSG = gettext(
-    "Raster for snapshot {snapshot_year} could not be validated.")
 INVALID_TRANSITION_VALUES_MSG = gettext(
     "The transition table expects values of {model_transitions} but found "
     "values of {transition_values}.")
@@ -2166,7 +2164,6 @@ def validate(args, limit_to=None):
     """
     validation_warnings = validation.validate(
         args, MODEL_SPEC['args'])
-
     sufficient_keys = validation.get_sufficient_keys(args)
     invalid_keys = validation.get_invalid_keys(validation_warnings)
 
@@ -2177,16 +2174,6 @@ def validate(args, limit_to=None):
             **MODEL_SPEC['args']['landcover_snapshot_csv']
         )['raster_path'].to_dict()
 
-        for snapshot_year, snapshot_raster_path in snapshots.items():
-            raster_error_message = validation.check_raster(
-                snapshot_raster_path)
-            if raster_error_message:
-                validation_warnings.append((
-                    ['landcover_snapshot_csv'],
-                    INVALID_SNAPSHOT_RASTER_MSG.format(
-                        snapshot_year=snapshot_year
-                    ) + ' ' + raster_error_message))
-
         if ("analysis_year" not in invalid_keys
                 and "analysis_year" in sufficient_keys):
             if max(set(snapshots.keys())) > int(args['analysis_year']):

diff --git a/src/natcap/invest/coastal_vulnerability.py b/src/natcap/invest/coastal_vulnerability.py
@@ -240,7 +240,7 @@
                     "type": "freestyle_string",
                     "about": gettext("Unique name for the habitat. No spaces allowed.")},
                 "path": {
-                    "type": {"vector", "raster"},
+                    "type": {"raster", "vector"},
                     "fields": {},
                     "geometries": {"POLYGON", "MULTIPOLYGON"},
                     "bands": {1: {"type": "number", "units": u.none}},
@@ -771,8 +771,6 @@ def execute(args):
         None
 
     """
-    _validate_habitat_table_paths(args['habitat_table_path'])
-
     output_dir = os.path.join(args['workspace_dir'])
     intermediate_dir = os.path.join(
         args['workspace_dir'], 'intermediate')
@@ -3450,36 +3448,6 @@ def logger_callback(proportion_complete):
     return logger_callback
 
 
-def _validate_habitat_table_paths(habitat_table_path):
-    """Validate paths to vectors within the habitat CSV can be opened.
-
-    Args:
-        habitat_table_path (str): typically args['habitat_table_path']
-
-    Returns:
-        None
-
-    Raises:
-        ValueError if any vector in the ``path`` column cannot be opened.
-    """
-    habitat_dataframe = validation.get_validated_dataframe(
-        habitat_table_path, **MODEL_SPEC['args']['habitat_table_path'])
-    bad_paths = []
-    for habitat_row in habitat_dataframe.itertuples():
-        try:
-            gis_type = pygeoprocessing.get_gis_type(habitat_row.path)
-            if not gis_type:
-                # Treating an unknown GIS type the same as a bad filepath
-                bad_paths.append(habitat_row.path)
-        except ValueError:
-            bad_paths.append(habitat_row.path)
-
-    if bad_paths:
-        raise ValueError(
-            f'Could not open these datasets referenced in {habitat_table_path}:'
-            + ' | '.join(bad_paths))
-
-
 @validation.invest_validator
 def validate(args, limit_to=None):
     """Validate args to ensure they conform to ``execute``'s contract.

diff --git a/src/natcap/invest/habitat_quality.py b/src/natcap/invest/habitat_quality.py
@@ -1040,23 +1040,13 @@ def _validate_threat_path(threat_path, lulc_key):
     """
     # Checking threat path exists to control custom error messages
     # for user readability.
-    try:
-        threat_gis_type = pygeoprocessing.get_gis_type(threat_path)
-        if threat_gis_type != pygeoprocessing.RASTER_TYPE:
-            # Raise a value error with custom message to help users
-            # debug threat raster issues
-            if lulc_key != '_b':
-                return "error"
-            # it's OK to have no threat raster w/ baseline scenario
-            else:
-                return None
-        else:
-            return threat_path
-    except ValueError:
-        if lulc_key != '_b':
-            return "error"
-        else:
+    if threat_path:
+        return threat_path
+    else:
+        if lulc_key == '_b':
             return None
+        else:
+            return 'error'
 
 
 @validation.invest_validator

diff --git a/src/natcap/invest/hra.py b/src/natcap/invest/hra.py
@@ -70,7 +70,7 @@
                         "names must match the habitat and stressor names in "
                         "the Criteria Scores Table.")},
                 "path": {
-                    "type": {"vector", "raster"},
+                    "type": {"raster", "vector"},
                     "bands": {1: {
                         "type": "number",
                         "units": u.none,
@@ -80,13 +80,13 @@
                             "values besides 0 or 1 will be treated as 0.")
                     }},
                     "fields": {},
-                    "geometries": spec_utils.POLYGONS,
+                    "geometries": spec_utils.ALL_GEOMS,
                     "about": gettext(
                         "Map of where the habitat or stressor exists. For "
                         "rasters, a pixel value of 1 indicates presence of "
                         "the habitat or stressor. 0 (or any other value) "
                         "indicates absence of the habitat or stressor. For "
-                        "vectors, a polygon indicates an area where the "
+                        "vectors, a geometry indicates an area where the "
                         "habitat or stressor is present.")
                 },
                 "type": {
@@ -1785,6 +1785,8 @@ def _parse_info_table(info_table_path):
     except ValueError as err:
         if 'Index has duplicate keys' in str(err):
             raise ValueError("Habitat and stressor names may not overlap.")
+        else:
+            raise err
 
     table = table.rename(columns={'stressor buffer (meters)': 'buffer'})
 

diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py
@@ -424,6 +424,27 @@ def check_vector(filepath, geometries, fields=None, projected=False,
     return projection_warning
 
 
+def check_raster_or_vector(filepath, **kwargs):
+    """Validate an input that may be a raster or vector.
+
+    Args:
+        filepath (string):  The path to the raster or vector.
+        **kwargs: kwargs of the raster and vector spec. Will be
+            passed to ``check_raster`` or ``check_vector``.
+
+    Returns:
+        A string error message if an error was found. ``None`` otherwise.
+    """
+    try:
+        gis_type = pygeoprocessing.get_gis_type(filepath)
+    except ValueError as err:
+        return str(err)
+    if gis_type == pygeoprocessing.RASTER_TYPE:
+        return check_raster(filepath, **kwargs)
+    else:
+        return check_vector(filepath, **kwargs)
+
+
 def check_freestyle_string(value, regexp=None, **kwargs):
     """Validate an arbitrary string.
 
@@ -639,7 +660,7 @@ def get_validated_dataframe(
         for col in matching_cols:
             try:
                 # frozenset needed to make the set hashable.  A frozenset and set with the same members are equal.
-                if col_spec['type'] in {'csv', 'directory', 'file', 'raster', 'vector', frozenset({'vector', 'raster'})}:
+                if col_spec['type'] in {'csv', 'directory', 'file', 'raster', 'vector', frozenset({'raster', 'vector'})}:
                     df[col] = df[col].apply(
                         lambda p: p if pandas.isna(p) else utils.expand_path(str(p).strip(), csv_path))
                     df[col] = df[col].astype(pandas.StringDtype())
@@ -660,6 +681,20 @@ def get_validated_dataframe(
                     f'Value(s) in the "{col}" column could not be interpreted '
                     f'as {col_spec["type"]}s. Original error: {err}')
 
+            col_type = col_spec['type']
+            if isinstance(col_type, set):
+                col_type = frozenset(col_type)
+            if col_type in {'raster', 'vector', frozenset({'raster', 'vector'})}:
+                # recursively validate the files within the column
+                def check_value(value):
+                    if pandas.isna(value):
+                        return
+                    err_msg = _VALIDATION_FUNCS[col_type](value, **col_spec)
+                    if err_msg:
+                        raise ValueError(
+                            f'Error in {axis} "{col}", value "{value}": {err_msg}')
+                df[col].apply(check_value)
+
     if any(df.columns.duplicated()):
         duplicated_columns = df.columns[df.columns.duplicated]
         return MESSAGES['DUPLICATE_HEADER'].format(
@@ -881,6 +916,7 @@ def get_headers_to_validate(spec):
     'option_string': check_option_string,
     'raster': functools.partial(timeout, check_raster),
     'vector': functools.partial(timeout, check_vector),
+    frozenset({'raster', 'vector'}): functools.partial(timeout, check_raster_or_vector),
     'other': None,  # Up to the user to define their validate()
 }
 
@@ -965,13 +1001,16 @@ def validate(args, spec, spatial_overlap_opts=None):
             LOGGER.debug(f'Provided key {key} does not exist in MODEL_SPEC')
             continue
 
+        param_type = parameter_spec['type']
+        if isinstance(param_type, set):
+            param_type = frozenset(param_type)
         # rewrite parameter_spec for any nested, conditional validity
         axis_keys = None
-        if parameter_spec['type'] == 'csv':
+        if param_type == 'csv':
             axis_keys = ['columns', 'rows']
-        elif parameter_spec['type'] == 'vector':
+        elif param_type == 'vector' or 'vector' in param_type:
             axis_keys = ['fields']
-        elif parameter_spec['type'] == 'directory':
+        elif param_type == 'directory':
             axis_keys = ['contents']
 
         if axis_keys:
@@ -985,7 +1024,7 @@ def validate(args, spec, spatial_overlap_opts=None):
                             bool(_evaluate_expression(
                                 nested_spec['required'], expression_values)))
 
-        type_validation_func = _VALIDATION_FUNCS[parameter_spec['type']]
+        type_validation_func = _VALIDATION_FUNCS[param_type]
 
         if type_validation_func is None:
             # Validation for 'other' type must be performed by the user.
@@ -1127,6 +1166,8 @@ def _wrapped_validate_func(args, limit_to=None):
                 # need to validate it.
                 if args_value not in ('', None):
                     input_type = args_key_spec['type']
+                    if isinstance(input_type, set):
+                        input_type = frozenset(input_type)
                     validator_func = _VALIDATION_FUNCS[input_type]
                     error_msg = validator_func(args_value, **args_key_spec)
 

diff --git a/src/natcap/invest/wind_energy.py b/src/natcap/invest/wind_energy.py
@@ -1289,10 +1289,15 @@ def execute(args):
     levelized_raster_path = os.path.join(
         out_dir, 'levelized_cost_price_per_kWh%s.tif' % suffix)
 
+    # Include foundation_cost, discount_rate, number_of_turbines with
+    # parameters_dict to pass for NPV calculation
+    for key in ['foundation_cost', 'discount_rate', 'number_of_turbines']:
+        parameters_dict[key] = float(args[key])
+
     task_graph.add_task(
         func=_calculate_npv_levelized_rasters,
         args=(harvested_masked_path, final_dist_raster_path, npv_raster_path,
-              levelized_raster_path, parameters_dict, args, price_list),
+              levelized_raster_path, parameters_dict, price_list),
         target_path_list=[npv_raster_path, levelized_raster_path],
         task_name='calculate_npv_levelized_rasters',
         dependent_task_list=[final_dist_task])
@@ -1321,7 +1326,7 @@ def execute(args):
 def _calculate_npv_levelized_rasters(
         base_harvested_raster_path, base_dist_raster_path,
         target_npv_raster_path, target_levelized_raster_path,
-        parameters_dict, args, price_list):
+        parameters_dict, price_list):
     """Calculate NPV and levelized rasters from harvested and dist rasters.
 
     Args:
@@ -1341,9 +1346,6 @@ def _calculate_npv_levelized_rasters(
         parameters_dict (dict): a dictionary of the turbine and biophysical
             global parameters.
 
-        args (dict): a dictionary that contains information on
-            ``foundation_cost``, ``discount_rate``, ``number_of_turbines``.
-
         price_list (list): a list of wind energy prices for a period of time.
 
 
@@ -1375,7 +1377,7 @@ def _calculate_npv_levelized_rasters(
     # The cost of infield cable in currency units per km
     infield_cost = parameters_dict['infield_cable_cost']
     # The cost of the foundation in currency units
-    foundation_cost = args['foundation_cost']
+    foundation_cost = parameters_dict['foundation_cost']
     # The cost of each turbine unit in currency units
     unit_cost = parameters_dict['turbine_cost']
     # The installation cost as a decimal
@@ -1385,7 +1387,7 @@ def _calculate_npv_levelized_rasters(
     # The operations and maintenance costs as a decimal factor of capex_arr
     op_maint_cost = parameters_dict['operation_maintenance_cost']
     # The discount rate as a decimal
-    discount_rate = args['discount_rate']
+    discount_rate = parameters_dict['discount_rate']
     # The cost to decommission the farm as a decimal factor of capex_arr
     decom = parameters_dict['decommission_cost']
     # The mega watt value for the turbines in MW
@@ -1401,16 +1403,15 @@ def _calculate_npv_levelized_rasters(
 
     # The total mega watt capacity of the wind farm where mega watt is the
     # turbines rated power
-    total_mega_watt = mega_watt * int(args['number_of_turbines'])
+    number_of_turbines = int(parameters_dict['number_of_turbines'])
+    total_mega_watt = mega_watt * number_of_turbines
 
     # Total infield cable cost
-    infield_cable_cost = infield_length * infield_cost * int(
-        args['number_of_turbines'])
+    infield_cable_cost = infield_length * infield_cost * number_of_turbines
     LOGGER.debug('infield_cable_cost : %s', infield_cable_cost)
 
     # Total foundation cost
-    total_foundation_cost = (foundation_cost + unit_cost) * int(
-        args['number_of_turbines'])
+    total_foundation_cost = (foundation_cost + unit_cost) * number_of_turbines
     LOGGER.debug('total_foundation_cost : %s', total_foundation_cost)
 
     # Nominal Capital Cost (CAP) minus the cost of cable which needs distances