Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate vector geometry types #1430

Merged
merged 6 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ Unreleased Changes
have been replaced with ``numpy.prod``.
https://github.com/natcap/invest/issues/1410
* Add support for python 3.11 (`#1103 <https://github.com/natcap/invest/issues/1103>`_)
* Vector geometry types will now be validated for all models
dcdenu4 marked this conversation as resolved.
Show resolved Hide resolved
(`#1374 <https://github.com/natcap/invest/issues/1374>`_)
* Datastack archives will now be correctly extracted
(`#1308 <https://github.com/natcap/invest/issues/1308>`_)
* NDR
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ GIT_SAMPLE_DATA_REPO_REV := 2e7cd618c661ec3f3b2a3bddfd2ce7d4704abc05

GIT_TEST_DATA_REPO := https://bitbucket.org/natcap/invest-test-data.git
GIT_TEST_DATA_REPO_PATH := $(DATA_DIR)/invest-test-data
GIT_TEST_DATA_REPO_REV := f15ac3e0a17c446e5a4bec4ccd50ce8cec359595
GIT_TEST_DATA_REPO_REV := da013683e80ea094fbb2309197e2488c02794da8

GIT_UG_REPO := https://github.com/natcap/invest.users-guide
GIT_UG_REPO_PATH := doc/users-guide
Expand Down
28 changes: 0 additions & 28 deletions src/natcap/invest/coastal_vulnerability.py
Original file line number Diff line number Diff line change
Expand Up @@ -3507,34 +3507,6 @@ def validate(args, limit_to=None):
invalid_keys = validation.get_invalid_keys(validation_warnings)
sufficient_keys = validation.get_sufficient_keys(args)

if 'shelf_contour_vector_path' not in invalid_keys:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now redundant to validation.validate.

# We can assume that this vector can already be opened because it
# passed validation so far.
vector = gdal.OpenEx(args['shelf_contour_vector_path'], gdal.OF_VECTOR)
layer = vector.GetLayer()
# linestring codes:
# https://github.com/OSGeo/gdal/blob/master/gdal/ogr/ogr_core.h
if layer.GetGeomType() not in [
ogr.wkbLineString, ogr.wkbMultiLineString,
ogr.wkbLineStringM, ogr.wkbMultiLineStringM,
ogr.wkbLineStringZM, ogr.wkbMultiLineStringZM,
ogr.wkbLineString25D, ogr.wkbMultiLineString25D]:
validation_warnings.append(
(['shelf_contour_vector_path'], POLYLINE_VECTOR_MSG))

if ('slr_vector_path' in sufficient_keys and
'slr_vector_path' not in invalid_keys):
vector = gdal.OpenEx(args['slr_vector_path'], gdal.OF_VECTOR)
layer = vector.GetLayer()
# all OGR point/multipoint codes
if layer.GetGeomType() not in [
ogr.wkbPoint, ogr.wkbMultiPoint,
ogr.wkbPointM, ogr.wkbMultiPointM,
ogr.wkbPointZM, ogr.wkbMultiPointZM,
ogr.wkbPoint25D, ogr.wkbMultiPoint25D]:
validation_warnings.append(
(['slr_vector_path'], POINT_GEOMETRY_MSG))

if ('slr_vector_path' not in invalid_keys and
'slr_field' not in invalid_keys and
'slr_vector_path' in sufficient_keys and
Expand Down
36 changes: 33 additions & 3 deletions src/natcap/invest/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import pygeoprocessing
from osgeo import gdal
from osgeo import osr
from osgeo import ogr

from . import gettext
from . import spec_utils
Expand Down Expand Up @@ -58,6 +59,7 @@
'BBOX_NOT_INTERSECT': gettext('Not all of the spatial layers overlap each '
'other. All bounding boxes must intersect: {bboxes}'),
'NEED_PERMISSION': gettext('You must have {permission} access to this file'),
'WRONG_GEOM_TYPE': gettext('Geometry type must be one of {allowed}')
}


Expand Down Expand Up @@ -335,8 +337,8 @@ def load_fields_from_vector(filepath, layer_id=0):
return fieldnames


def check_vector(filepath, fields=None, projected=False, projection_units=None,
**kwargs):
def check_vector(filepath, geometries, fields=None, projected=False,
dcdenu4 marked this conversation as resolved.
Show resolved Hide resolved
projection_units=None, **kwargs):
"""Validate a GDAL vector on disk.

Note:
Expand All @@ -346,6 +348,9 @@ def check_vector(filepath, fields=None, projected=False, projection_units=None,
Args:
filepath (string): The path to the vector on disk. The file must exist
and be readable.
geometries (set): Set of geometry type(s) that are allowed. Options are
'POINT', 'LINESTRING', 'POLYGON', 'MULTIPOINT', 'MULTILINESTRING',
and 'MULTIPOLYGON'.
fields=None (dict): A dictionary spec of field names that the vector is
expected to have. See the docstring of ``check_headers`` for
details on validation rules.
Expand All @@ -366,11 +371,35 @@ def check_vector(filepath, fields=None, projected=False, projection_units=None,
gdal_dataset = gdal.OpenEx(filepath, gdal.OF_VECTOR)
gdal.PopErrorHandler()

geom_map = {
'POINT': [ogr.wkbPoint, ogr.wkbPointM, ogr.wkbPointZM, ogr.wkbPoint25D],
'LINESTRING': [ogr.wkbLineString, ogr.wkbLineStringM,
ogr.wkbLineStringZM, ogr.wkbLineString25D],
'POLYGON': [ogr.wkbPolygon, ogr.wkbPolygonM,
ogr.wkbPolygonZM, ogr.wkbPolygon25D],
'MULTIPOINT': [ogr.wkbMultiPoint, ogr.wkbMultiPointM,
ogr.wkbMultiPointZM, ogr.wkbMultiPoint25D],
'MULTILINESTRING': [ogr.wkbMultiLineString, ogr.wkbMultiLineStringM,
ogr.wkbMultiLineStringZM, ogr.wkbMultiLineString25D],
'MULTIPOLYGON': [ogr.wkbMultiPolygon, ogr.wkbMultiPolygonM,
ogr.wkbMultiPolygonZM, ogr.wkbMultiPolygon25D]
}

allowed_geom_types = []
for geom in geometries:
allowed_geom_types += geom_map[geom]

if gdal_dataset is None:
return MESSAGES['NOT_GDAL_VECTOR']

# NOTE: this only checks the layer geometry type, not the types of the actual
# geometries (layer.GetGeometryTypes()). This is probably equivalent in most
# cases, and it's more efficient than checking every geometry, but we might
# need to change this in the future if it becomes a problem. Currently not
# supporting ogr.wkbUnknown which allows mixed types.
layer = gdal_dataset.GetLayer()
srs = layer.GetSpatialRef()
if layer.GetGeomType() not in allowed_geom_types:
return MESSAGES['WRONG_GEOM_TYPE'].format(allowed=geometries)

if fields:
field_patterns = get_headers_to_validate(fields)
Expand All @@ -380,6 +409,7 @@ def check_vector(filepath, fields=None, projected=False, projection_units=None,
if required_field_warning:
return required_field_warning

srs = layer.GetSpatialRef()
projection_warning = _check_projection(srs, projected, projection_units)
return projection_warning

Expand Down
59 changes: 0 additions & 59 deletions tests/test_coastal_vulnerability.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,27 +1050,6 @@ def test_dem_undefined_nodata(self):
# It's all negative going in, so should be all 0s out.
numpy.testing.assert_array_equal(numpy.zeros_like(array), pos_array)

def test_exception_from_validate_polyline(self):
"""CV: raise ValueError on incorrect geometry type during validation.

shelf_contour_vector_path must be a line geometry, here it's a polygon.
"""
from natcap.invest import coastal_vulnerability
gpkg_driver = ogr.GetDriverByName("GPKG")
shelf_poly_path = os.path.join(self.workspace_dir, 'shelf_poly.gpkg')
vector = gpkg_driver.CreateDataSource(shelf_poly_path)
wgs84_srs = osr.SpatialReference()
wgs84_srs.ImportFromEPSG(4326)
vector.CreateLayer('layer', wgs84_srs, ogr.wkbPolygon)
vector = None
args = CoastalVulnerabilityTests.generate_base_args(self.workspace_dir)
args['shelf_contour_vector_path'] = shelf_poly_path
with self.assertRaises(ValueError):
err_list = coastal_vulnerability.validate(args)
for keys, err_strings in err_list:
if coastal_vulnerability.POLYLINE_VECTOR_MSG in err_strings:
raise ValueError(err_list)

def test_shore_points_on_single_polygon(self):
"""CV: test shore point creation with single polygon landmass."""
from natcap.invest import coastal_vulnerability
Expand Down Expand Up @@ -1700,26 +1679,6 @@ def test_missing_keys_sealevelrise(self):
'slr_field'])
self.assertEqual(invalid_keys, expected_missing_keys)

def test_message_about_incorrect_geometry(self):
"""CV Validate: test catching incorrect shelf contour geometry type.

shelf_contour_vector_path must be a line geometry, here it's a polygon.
"""
from natcap.invest import coastal_vulnerability
gpkg_driver = ogr.GetDriverByName("GPKG")
shelf_poly_path = os.path.join(self.workspace_dir, 'shelf_poly.gpkg')
vector = gpkg_driver.CreateDataSource(shelf_poly_path)
srs = osr.SpatialReference()
srs.ImportFromEPSG(4326)
vector.CreateLayer('layer', srs, ogr.wkbPolygon)
vector = None
with self.assertRaises(ValueError):
err_list = coastal_vulnerability.validate(
{'shelf_contour_vector_path': shelf_poly_path})
for keys, err_strings in err_list:
if coastal_vulnerability.POLYLINE_VECTOR_MSG in err_strings:
raise ValueError(err_list)

def test_missing_sealevelrise_field(self):
"""CV Validate: test catching SLR field not present in vector."""
from natcap.invest import coastal_vulnerability
Expand All @@ -1735,24 +1694,6 @@ def test_missing_sealevelrise_field(self):
validation.MESSAGES['INVALID_OPTION'].format(option_list=['Trend']))
self.assertTrue(expected_err in err_list)

def test_check_slr_geometry_type(self):
"""CV Validate: test catching non-point geometries in SLR input"""
from natcap.invest import coastal_vulnerability
gpkg_driver = ogr.GetDriverByName("GPKG")
# create a non-point/multipoint geometry
polygon_path = os.path.join(self.workspace_dir, 'slr_poly.gpkg')
polygon_vector = gpkg_driver.CreateDataSource(polygon_path)
srs = osr.SpatialReference()
srs.ImportFromEPSG(4326)
layer = polygon_vector.CreateLayer('layer', srs, ogr.wkbPolygon)
layer.CreateField(ogr.FieldDefn('id', ogr.OFTInteger64))
layer, polygon_vector = None, None

args = {'slr_vector_path': polygon_path}
warning_messages = coastal_vulnerability.validate(args)
msg = (['slr_vector_path'], coastal_vulnerability.POINT_GEOMETRY_MSG)
self.assertTrue(msg in warning_messages)


def make_slr_vector(slr_point_vector_path, fieldname, shapely_feature, srs):
"""Create an SLR vector with a single point feature.
Expand Down
39 changes: 31 additions & 8 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ def test_file_not_found(self):
from natcap.invest import validation

filepath = os.path.join(self.workspace_dir, 'file.txt')
error_msg = validation.check_vector(filepath)
error_msg = validation.check_vector(filepath, geometries={'POINT'})
self.assertEqual(error_msg, validation.MESSAGES['FILE_NOT_FOUND'])

def test_invalid_vector(self):
Expand All @@ -469,7 +469,7 @@ def test_invalid_vector(self):
with open(filepath, 'w') as bad_vector:
bad_vector.write('not a vector')

error_msg = validation.check_vector(filepath)
error_msg = validation.check_vector(filepath, geometries={'POINT'})
self.assertEqual(error_msg, validation.MESSAGES['NOT_GDAL_VECTOR'])

def test_missing_fieldnames(self):
Expand All @@ -481,7 +481,7 @@ def test_missing_fieldnames(self):
vector = driver.Create(filepath, 0, 0, 0, gdal.GDT_Unknown)
wgs84_srs = osr.SpatialReference()
wgs84_srs.ImportFromEPSG(4326)
layer = vector.CreateLayer('sample_layer', wgs84_srs, ogr.wkbUnknown)
layer = vector.CreateLayer('sample_layer', wgs84_srs, ogr.wkbPoint)

for field_name, field_type in (('COL_A', ogr.OFTInteger),
('col_b', ogr.OFTString)):
Expand All @@ -497,7 +497,8 @@ def test_missing_fieldnames(self):
vector = None

error_msg = validation.check_vector(
filepath, fields={'col_a': {}, 'col_b': {}, 'col_c': {}})
filepath, geometries={'POINT'},
fields={'col_a': {}, 'col_b': {}, 'col_c': {}})
expected = validation.MESSAGES['MATCHED_NO_HEADERS'].format(
header='field', header_name='col_c')
self.assertEqual(error_msg, expected)
Expand All @@ -511,19 +512,39 @@ def test_vector_projected_in_m(self):
vector = driver.Create(filepath, 0, 0, 0, gdal.GDT_Unknown)
meters_srs = osr.SpatialReference()
meters_srs.ImportFromEPSG(32731)
layer = vector.CreateLayer('sample_layer', meters_srs, ogr.wkbUnknown)
layer = vector.CreateLayer('sample_layer', meters_srs, ogr.wkbPoint)

layer = None
vector = None

error_msg = validation.check_vector(
filepath, projected=True, projection_units=spec_utils.u.foot)
filepath, geometries={'POINT'}, projected=True,
projection_units=spec_utils.u.foot)
expected_msg = validation.MESSAGES['WRONG_PROJECTION_UNIT'].format(
unit_a='foot', unit_b='metre')
self.assertEqual(error_msg, expected_msg)

self.assertEqual(None, validation.check_vector(
filepath, projected=True, projection_units=spec_utils.u.meter))
filepath, geometries={'POINT'}, projected=True,
projection_units=spec_utils.u.meter))

def test_wrong_geom_type(self):
"""Validation: checks that the vector's geometry type is correct."""
from natcap.invest import spec_utils, validation
driver = gdal.GetDriverByName('GPKG')
filepath = os.path.join(self.workspace_dir, 'vector.gpkg')
vector = driver.Create(filepath, 0, 0, 0, gdal.GDT_Unknown)
meters_srs = osr.SpatialReference()
meters_srs.ImportFromEPSG(32731)
layer = vector.CreateLayer('sample_layer', meters_srs, ogr.wkbPoint)
layer = None
vector = None
self.assertEqual(
validation.check_vector(filepath, geometries={'POLYGON', 'POINT'}),
None)
self.assertEqual(
validation.check_vector(filepath, geometries={'MULTIPOINT'}),
validation.MESSAGES['WRONG_GEOM_TYPE'].format(allowed={'MULTIPOINT'}))


class FreestyleStringValidation(unittest.TestCase):
Expand Down Expand Up @@ -1172,7 +1193,8 @@ def test_spatial_overlap_error(self):
'name': 'vector 1',
'about': 'vector 1',
'required': True,
'fields': {}
'fields': {},
'geometries': {'POINT'}
}
}

Expand Down Expand Up @@ -1287,6 +1309,7 @@ def test_spatial_overlap_error_optional_args(self):
'name': 'vector 1',
'about': 'vector 1',
'required': False,
'geometries': {'POINT'}
}
}

Expand Down
31 changes: 0 additions & 31 deletions tests/test_wave_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,34 +678,3 @@ def test_validate_keys_missing_values(self):
expected_error = (
['dem_path', 'wave_base_data_path'], validation.MESSAGES['MISSING_VALUE'])
self.assertTrue(expected_error in validation_error_list)

def test_validate_bad_aoi_incorrect_proj_units(self):
"""WaveEnergy: test validating AOI vector with incorrect units."""
from natcap.invest import wave_energy, validation

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests aren't specific to wave energy, and the same thing is covered in the validation tests. I removed them because the sample data vectors they use have the wrong geometry type, which now breaks.

args = {}
# Validation will recognize the units "foot" and say it's incorrect
args['aoi_path'] = os.path.join(SAMPLE_DATA,
'bad_AOI_us_survey_foot.shp')
validation_error_list = wave_energy.validate(args)
expected_error = (
['aoi_path'],
validation.MESSAGES['WRONG_PROJECTION_UNIT'].format(
unit_a='meter', unit_b='us_survey_foot'))
self.assertTrue(expected_error in validation_error_list)

def test_validate_bad_aoi_unrecognized_proj_units(self):
"""WaveEnergy: test validating AOI vector with unrecognized units"""
from natcap.invest import wave_energy, validation

args = {}
# The unit "not_a_unit" is not recognized by pint
args['aoi_path'] = os.path.join(
SAMPLE_DATA, 'bad_AOI_fake_unit.shp')

validation_error_list = wave_energy.validate(args)
expected_error = (
['aoi_path'],
validation.MESSAGES['WRONG_PROJECTION_UNIT'].format(
unit_a='meter', unit_b='not_a_unit'))
self.assertTrue(expected_error in validation_error_list)
Loading