From 74cc459920921a65e28270726b05a84300d3ddd9 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Tue, 17 Oct 2023 14:45:33 -0700 Subject: [PATCH 1/5] add geometry type check to vector validation #1374 --- src/natcap/invest/validation.py | 28 ++++++++++++++++++++--- tests/test_validation.py | 39 ++++++++++++++++++++++++++------- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 719c50601e..403664b5c1 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -17,6 +17,7 @@ import pygeoprocessing from osgeo import gdal from osgeo import osr +from osgeo import ogr from . import gettext from . import spec_utils @@ -58,6 +59,7 @@ 'BBOX_NOT_INTERSECT': gettext('Not all of the spatial layers overlap each ' 'other. All bounding boxes must intersect: {bboxes}'), 'NEED_PERMISSION': gettext('You must have {permission} access to this file'), + 'WRONG_GEOM_TYPE': gettext('Geometry type must be one of {allowed}') } @@ -335,8 +337,8 @@ def load_fields_from_vector(filepath, layer_id=0): return fieldnames -def check_vector(filepath, fields=None, projected=False, projection_units=None, - **kwargs): +def check_vector(filepath, geometries, fields=None, projected=False, + projection_units=None, **kwargs): """Validate a GDAL vector on disk. Note: @@ -366,11 +368,30 @@ def check_vector(filepath, fields=None, projected=False, projection_units=None, gdal_dataset = gdal.OpenEx(filepath, gdal.OF_VECTOR) gdal.PopErrorHandler() + geom_map = { + 'POINT': [ogr.wkbPoint, ogr.wkbPointM, ogr.wkbPointZM, ogr.wkbPoint25D], + 'LINESTRING': [ogr.wkbLineString, ogr.wkbLineStringM, + ogr.wkbLineStringZM, ogr.wkbLineString25D], + 'POLYGON': [ogr.wkbPolygon, ogr.wkbPolygonM, + ogr.wkbPolygonZM, ogr.wkbPolygon25D], + 'MULTIPOINT': [ogr.wkbMultiPoint, ogr.wkbMultiPointM, + ogr.wkbMultiPointZM, ogr.wkbMultiPoint25D], + 'MULTILINESTRING': [ogr.wkbMultiLineString, ogr.wkbMultiLineStringM, + ogr.wkbMultiLineStringZM, ogr.wkbMultiLineString25D], + 'MULTIPOLYGON': [ogr.wkbMultiPolygon, ogr.wkbMultiPolygonM, + ogr.wkbMultiPolygonZM, ogr.wkbMultiPolygon25D] + } + + allowed_geom_types = [] + for geom in geometries: + allowed_geom_types += geom_map[geom] + if gdal_dataset is None: return MESSAGES['NOT_GDAL_VECTOR'] layer = gdal_dataset.GetLayer() - srs = layer.GetSpatialRef() + if layer.GetGeomType() not in allowed_geom_types: + return MESSAGES['WRONG_GEOM_TYPE'].format(allowed=geometries) if fields: field_patterns = get_headers_to_validate(fields) @@ -380,6 +401,7 @@ def check_vector(filepath, fields=None, projected=False, projection_units=None, if required_field_warning: return required_field_warning + srs = layer.GetSpatialRef() projection_warning = _check_projection(srs, projected, projection_units) return projection_warning diff --git a/tests/test_validation.py b/tests/test_validation.py index 182cafdcdd..8f9e10acf8 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -458,7 +458,7 @@ def test_file_not_found(self): from natcap.invest import validation filepath = os.path.join(self.workspace_dir, 'file.txt') - error_msg = validation.check_vector(filepath) + error_msg = validation.check_vector(filepath, geometries={'POINT'}) self.assertEqual(error_msg, validation.MESSAGES['FILE_NOT_FOUND']) def test_invalid_vector(self): @@ -469,7 +469,7 @@ def test_invalid_vector(self): with open(filepath, 'w') as bad_vector: bad_vector.write('not a vector') - error_msg = validation.check_vector(filepath) + error_msg = validation.check_vector(filepath, geometries={'POINT'}) self.assertEqual(error_msg, validation.MESSAGES['NOT_GDAL_VECTOR']) def test_missing_fieldnames(self): @@ -481,7 +481,7 @@ def test_missing_fieldnames(self): vector = driver.Create(filepath, 0, 0, 0, gdal.GDT_Unknown) wgs84_srs = osr.SpatialReference() wgs84_srs.ImportFromEPSG(4326) - layer = vector.CreateLayer('sample_layer', wgs84_srs, ogr.wkbUnknown) + layer = vector.CreateLayer('sample_layer', wgs84_srs, ogr.wkbPoint) for field_name, field_type in (('COL_A', ogr.OFTInteger), ('col_b', ogr.OFTString)): @@ -497,7 +497,8 @@ def test_missing_fieldnames(self): vector = None error_msg = validation.check_vector( - filepath, fields={'col_a': {}, 'col_b': {}, 'col_c': {}}) + filepath, geometries={'POINT'}, + fields={'col_a': {}, 'col_b': {}, 'col_c': {}}) expected = validation.MESSAGES['MATCHED_NO_HEADERS'].format( header='field', header_name='col_c') self.assertEqual(error_msg, expected) @@ -511,19 +512,39 @@ def test_vector_projected_in_m(self): vector = driver.Create(filepath, 0, 0, 0, gdal.GDT_Unknown) meters_srs = osr.SpatialReference() meters_srs.ImportFromEPSG(32731) - layer = vector.CreateLayer('sample_layer', meters_srs, ogr.wkbUnknown) + layer = vector.CreateLayer('sample_layer', meters_srs, ogr.wkbPoint) layer = None vector = None error_msg = validation.check_vector( - filepath, projected=True, projection_units=spec_utils.u.foot) + filepath, geometries={'POINT'}, projected=True, + projection_units=spec_utils.u.foot) expected_msg = validation.MESSAGES['WRONG_PROJECTION_UNIT'].format( unit_a='foot', unit_b='metre') self.assertEqual(error_msg, expected_msg) self.assertEqual(None, validation.check_vector( - filepath, projected=True, projection_units=spec_utils.u.meter)) + filepath, geometries={'POINT'}, projected=True, + projection_units=spec_utils.u.meter)) + + def test_wrong_geom_type(self): + """Validation: checks that the vector's geometry type is correct.""" + from natcap.invest import spec_utils, validation + driver = gdal.GetDriverByName('GPKG') + filepath = os.path.join(self.workspace_dir, 'vector.gpkg') + vector = driver.Create(filepath, 0, 0, 0, gdal.GDT_Unknown) + meters_srs = osr.SpatialReference() + meters_srs.ImportFromEPSG(32731) + layer = vector.CreateLayer('sample_layer', meters_srs, ogr.wkbPoint) + layer = None + vector = None + self.assertEqual( + validation.check_vector(filepath, geometries={'POLYGON', 'POINT'}), + None) + self.assertEqual( + validation.check_vector(filepath, geometries={'MULTIPOINT'}), + validation.MESSAGES['WRONG_GEOM_TYPE'].format(allowed={'MULTIPOINT'})) class FreestyleStringValidation(unittest.TestCase): @@ -1172,7 +1193,8 @@ def test_spatial_overlap_error(self): 'name': 'vector 1', 'about': 'vector 1', 'required': True, - 'fields': {} + 'fields': {}, + 'geometries': {'POINT'} } } @@ -1287,6 +1309,7 @@ def test_spatial_overlap_error_optional_args(self): 'name': 'vector 1', 'about': 'vector 1', 'required': False, + 'geometries': {'POINT'} } } From 0a9c9c629e2151f1fa61f85569c91b6f94f1d546 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Tue, 17 Oct 2023 17:02:18 -0700 Subject: [PATCH 2/5] remove redundant validation and tests #1374 --- src/natcap/invest/coastal_vulnerability.py | 28 ---------- tests/test_coastal_vulnerability.py | 59 ---------------------- tests/test_wave_energy.py | 31 ------------ 3 files changed, 118 deletions(-) diff --git a/src/natcap/invest/coastal_vulnerability.py b/src/natcap/invest/coastal_vulnerability.py index e9d3b6c109..dcfcfab1c4 100644 --- a/src/natcap/invest/coastal_vulnerability.py +++ b/src/natcap/invest/coastal_vulnerability.py @@ -3507,34 +3507,6 @@ def validate(args, limit_to=None): invalid_keys = validation.get_invalid_keys(validation_warnings) sufficient_keys = validation.get_sufficient_keys(args) - if 'shelf_contour_vector_path' not in invalid_keys: - # We can assume that this vector can already be opened because it - # passed validation so far. - vector = gdal.OpenEx(args['shelf_contour_vector_path'], gdal.OF_VECTOR) - layer = vector.GetLayer() - # linestring codes: - # https://github.com/OSGeo/gdal/blob/master/gdal/ogr/ogr_core.h - if layer.GetGeomType() not in [ - ogr.wkbLineString, ogr.wkbMultiLineString, - ogr.wkbLineStringM, ogr.wkbMultiLineStringM, - ogr.wkbLineStringZM, ogr.wkbMultiLineStringZM, - ogr.wkbLineString25D, ogr.wkbMultiLineString25D]: - validation_warnings.append( - (['shelf_contour_vector_path'], POLYLINE_VECTOR_MSG)) - - if ('slr_vector_path' in sufficient_keys and - 'slr_vector_path' not in invalid_keys): - vector = gdal.OpenEx(args['slr_vector_path'], gdal.OF_VECTOR) - layer = vector.GetLayer() - # all OGR point/multipoint codes - if layer.GetGeomType() not in [ - ogr.wkbPoint, ogr.wkbMultiPoint, - ogr.wkbPointM, ogr.wkbMultiPointM, - ogr.wkbPointZM, ogr.wkbMultiPointZM, - ogr.wkbPoint25D, ogr.wkbMultiPoint25D]: - validation_warnings.append( - (['slr_vector_path'], POINT_GEOMETRY_MSG)) - if ('slr_vector_path' not in invalid_keys and 'slr_field' not in invalid_keys and 'slr_vector_path' in sufficient_keys and diff --git a/tests/test_coastal_vulnerability.py b/tests/test_coastal_vulnerability.py index 3e73a57030..ac92b240fc 100644 --- a/tests/test_coastal_vulnerability.py +++ b/tests/test_coastal_vulnerability.py @@ -1050,27 +1050,6 @@ def test_dem_undefined_nodata(self): # It's all negative going in, so should be all 0s out. numpy.testing.assert_array_equal(numpy.zeros_like(array), pos_array) - def test_exception_from_validate_polyline(self): - """CV: raise ValueError on incorrect geometry type during validation. - - shelf_contour_vector_path must be a line geometry, here it's a polygon. - """ - from natcap.invest import coastal_vulnerability - gpkg_driver = ogr.GetDriverByName("GPKG") - shelf_poly_path = os.path.join(self.workspace_dir, 'shelf_poly.gpkg') - vector = gpkg_driver.CreateDataSource(shelf_poly_path) - wgs84_srs = osr.SpatialReference() - wgs84_srs.ImportFromEPSG(4326) - vector.CreateLayer('layer', wgs84_srs, ogr.wkbPolygon) - vector = None - args = CoastalVulnerabilityTests.generate_base_args(self.workspace_dir) - args['shelf_contour_vector_path'] = shelf_poly_path - with self.assertRaises(ValueError): - err_list = coastal_vulnerability.validate(args) - for keys, err_strings in err_list: - if coastal_vulnerability.POLYLINE_VECTOR_MSG in err_strings: - raise ValueError(err_list) - def test_shore_points_on_single_polygon(self): """CV: test shore point creation with single polygon landmass.""" from natcap.invest import coastal_vulnerability @@ -1700,26 +1679,6 @@ def test_missing_keys_sealevelrise(self): 'slr_field']) self.assertEqual(invalid_keys, expected_missing_keys) - def test_message_about_incorrect_geometry(self): - """CV Validate: test catching incorrect shelf contour geometry type. - - shelf_contour_vector_path must be a line geometry, here it's a polygon. - """ - from natcap.invest import coastal_vulnerability - gpkg_driver = ogr.GetDriverByName("GPKG") - shelf_poly_path = os.path.join(self.workspace_dir, 'shelf_poly.gpkg') - vector = gpkg_driver.CreateDataSource(shelf_poly_path) - srs = osr.SpatialReference() - srs.ImportFromEPSG(4326) - vector.CreateLayer('layer', srs, ogr.wkbPolygon) - vector = None - with self.assertRaises(ValueError): - err_list = coastal_vulnerability.validate( - {'shelf_contour_vector_path': shelf_poly_path}) - for keys, err_strings in err_list: - if coastal_vulnerability.POLYLINE_VECTOR_MSG in err_strings: - raise ValueError(err_list) - def test_missing_sealevelrise_field(self): """CV Validate: test catching SLR field not present in vector.""" from natcap.invest import coastal_vulnerability @@ -1735,24 +1694,6 @@ def test_missing_sealevelrise_field(self): validation.MESSAGES['INVALID_OPTION'].format(option_list=['Trend'])) self.assertTrue(expected_err in err_list) - def test_check_slr_geometry_type(self): - """CV Validate: test catching non-point geometries in SLR input""" - from natcap.invest import coastal_vulnerability - gpkg_driver = ogr.GetDriverByName("GPKG") - # create a non-point/multipoint geometry - polygon_path = os.path.join(self.workspace_dir, 'slr_poly.gpkg') - polygon_vector = gpkg_driver.CreateDataSource(polygon_path) - srs = osr.SpatialReference() - srs.ImportFromEPSG(4326) - layer = polygon_vector.CreateLayer('layer', srs, ogr.wkbPolygon) - layer.CreateField(ogr.FieldDefn('id', ogr.OFTInteger64)) - layer, polygon_vector = None, None - - args = {'slr_vector_path': polygon_path} - warning_messages = coastal_vulnerability.validate(args) - msg = (['slr_vector_path'], coastal_vulnerability.POINT_GEOMETRY_MSG) - self.assertTrue(msg in warning_messages) - def make_slr_vector(slr_point_vector_path, fieldname, shapely_feature, srs): """Create an SLR vector with a single point feature. diff --git a/tests/test_wave_energy.py b/tests/test_wave_energy.py index e95da02f94..99e85a9a1a 100644 --- a/tests/test_wave_energy.py +++ b/tests/test_wave_energy.py @@ -678,34 +678,3 @@ def test_validate_keys_missing_values(self): expected_error = ( ['dem_path', 'wave_base_data_path'], validation.MESSAGES['MISSING_VALUE']) self.assertTrue(expected_error in validation_error_list) - - def test_validate_bad_aoi_incorrect_proj_units(self): - """WaveEnergy: test validating AOI vector with incorrect units.""" - from natcap.invest import wave_energy, validation - - args = {} - # Validation will recognize the units "foot" and say it's incorrect - args['aoi_path'] = os.path.join(SAMPLE_DATA, - 'bad_AOI_us_survey_foot.shp') - validation_error_list = wave_energy.validate(args) - expected_error = ( - ['aoi_path'], - validation.MESSAGES['WRONG_PROJECTION_UNIT'].format( - unit_a='meter', unit_b='us_survey_foot')) - self.assertTrue(expected_error in validation_error_list) - - def test_validate_bad_aoi_unrecognized_proj_units(self): - """WaveEnergy: test validating AOI vector with unrecognized units""" - from natcap.invest import wave_energy, validation - - args = {} - # The unit "not_a_unit" is not recognized by pint - args['aoi_path'] = os.path.join( - SAMPLE_DATA, 'bad_AOI_fake_unit.shp') - - validation_error_list = wave_energy.validate(args) - expected_error = ( - ['aoi_path'], - validation.MESSAGES['WRONG_PROJECTION_UNIT'].format( - unit_a='meter', unit_b='not_a_unit')) - self.assertTrue(expected_error in validation_error_list) From 627082841d3ee663019882497d506a8704e652f7 Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Wed, 18 Oct 2023 10:00:47 -0700 Subject: [PATCH 3/5] add comment and history note #1374 --- HISTORY.rst | 2 ++ src/natcap/invest/validation.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 59523f494b..8970af4222 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -42,6 +42,8 @@ Unreleased Changes have been replaced with ``numpy.prod``. https://github.com/natcap/invest/issues/1410 * Add support for python 3.11 (`#1103 `_) + * Vector geometry types will now be validated for all models + (`#1374 `_) * NDR * Fixing an issue where minor geometric issues in the watersheds input (such as a ring self-intersection) would raise an error in the model. diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 403664b5c1..676824c226 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -389,6 +389,11 @@ def check_vector(filepath, geometries, fields=None, projected=False, if gdal_dataset is None: return MESSAGES['NOT_GDAL_VECTOR'] + # NOTE: this only checks the layer geometry type, not the types of the actual + # geometries (layer.GetGeometryTypes()). This is probably equivalent in most + # cases, and it's more efficient than checking every geometry, but we might + # need to change this in the future if it becomes a problem. Currently not + # supporting ogr.wkbUnknown which allows mixed types. layer = gdal_dataset.GetLayer() if layer.GetGeomType() not in allowed_geom_types: return MESSAGES['WRONG_GEOM_TYPE'].format(allowed=geometries) From d0f7d5b955ecc4bab655a80d3274bfb8d02fa2ec Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Wed, 18 Oct 2023 10:04:40 -0700 Subject: [PATCH 4/5] update test data commit hash #1374 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 589b97066e..39514077a0 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ GIT_SAMPLE_DATA_REPO_REV := 2e7cd618c661ec3f3b2a3bddfd2ce7d4704abc05 GIT_TEST_DATA_REPO := https://bitbucket.org/natcap/invest-test-data.git GIT_TEST_DATA_REPO_PATH := $(DATA_DIR)/invest-test-data -GIT_TEST_DATA_REPO_REV := f15ac3e0a17c446e5a4bec4ccd50ce8cec359595 +GIT_TEST_DATA_REPO_REV := da013683e80ea094fbb2309197e2488c02794da8 GIT_UG_REPO := https://github.com/natcap/invest.users-guide GIT_UG_REPO_PATH := doc/users-guide From de37cd81598e36e1d288c0b489dffd69d8c2299a Mon Sep 17 00:00:00 2001 From: Emily Soth Date: Mon, 23 Oct 2023 12:32:23 -0700 Subject: [PATCH 5/5] add geometries param to check_vector docstring --- src/natcap/invest/validation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py index 676824c226..b98e193de8 100644 --- a/src/natcap/invest/validation.py +++ b/src/natcap/invest/validation.py @@ -348,6 +348,9 @@ def check_vector(filepath, geometries, fields=None, projected=False, Args: filepath (string): The path to the vector on disk. The file must exist and be readable. + geometries (set): Set of geometry type(s) that are allowed. Options are + 'POINT', 'LINESTRING', 'POLYGON', 'MULTIPOINT', 'MULTILINESTRING', + and 'MULTIPOLYGON'. fields=None (dict): A dictionary spec of field names that the vector is expected to have. See the docstring of ``check_headers`` for details on validation rules.