From 2d5deff304696fd8fff061d8fd3becb810e27e9d Mon Sep 17 00:00:00 2001 From: Craig Arthur Date: Wed, 14 Feb 2024 11:26:41 +1100 Subject: [PATCH] #145 replace shapefile with pyshp module --- Utilities/shapefile.py | 1186 --------------------------------------- Utilities/shptools.py | 197 ++++--- Utilities/tracks2shp.py | 230 +++++--- 3 files changed, 273 insertions(+), 1340 deletions(-) delete mode 100755 Utilities/shapefile.py diff --git a/Utilities/shapefile.py b/Utilities/shapefile.py deleted file mode 100755 index 9fed7a9b..00000000 --- a/Utilities/shapefile.py +++ /dev/null @@ -1,1186 +0,0 @@ -""" -shapefile.py -Provides read and write support for ESRI Shapefiles. -author: jlawheadgeospatialpython.com -date: 20130727 -version: 1.2.0 -Compatible with Python versions 2.4-3.x -""" - -__version__ = "1.2.0" - -from struct import pack, unpack, calcsize, error -import os -import sys -import time -import array -import tempfile - -# -# Constants for shape types -NULL = 0 -POINT = 1 -POLYLINE = 3 -POLYGON = 5 -MULTIPOINT = 8 -POINTZ = 11 -POLYLINEZ = 13 -POLYGONZ = 15 -MULTIPOINTZ = 18 -POINTM = 21 -POLYLINEM = 23 -POLYGONM = 25 -MULTIPOINTM = 28 -MULTIPATCH = 31 - -PYTHON3 = sys.version_info[0] == 3 - -if PYTHON3: - xrange = range - -def b(v): - if PYTHON3: - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode('utf-8') - elif isinstance(v, bytes): - # Already bytes. - return v - else: - # Error. - raise Exception('Unknown input type') - else: - # For python 2 assume str passed in and return str. - return v - -def u(v): - if PYTHON3: - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode('utf-8') - elif isinstance(v, str): - # Already str. - return v - else: - # Error. - raise Exception('Unknown input type') - else: - # For python 2 assume str passed in and return str. - return v - -def is_string(v): - if PYTHON3: - return isinstance(v, str) - else: - return isinstance(v, str) - -class _Array(array.array): - """Converts python tuples to lits of the appropritate type. - Used to unpack different shapefile header parts.""" - def __repr__(self): - return str(self.tolist()) - -def signed_area(coords): - """Return the signed area enclosed by a ring using the linear time - algorithm at http://www.cgafaq.info/wiki/Polygon_Area. A value >= 0 - indicates a counter-clockwise oriented ring. - """ - xs, ys = list(map(list, list(zip(*coords)))) - xs.append(xs[1]) - ys.append(ys[1]) - return sum(xs[i]*(ys[i+1]-ys[i-1]) for i in range(1, len(coords)))/2.0 - -class _Shape: - def __init__(self, shapeType=None): - """Stores the geometry of the different shape types - specified in the Shapefile spec. Shape types are - usually point, polyline, or polygons. Every shape type - except the "Null" type contains points at some level for - example verticies in a polygon. If a shape type has - multiple shapes containing points within a single - geometry record then those shapes are called parts. Parts - are designated by their starting index in geometry record's - list of shapes.""" - self.shapeType = shapeType - self.points = [] - - @property - def __geo_interface__(self): - if self.shapeType in [POINT, POINTM, POINTZ]: - return { - 'type': 'Point', - 'coordinates': tuple(self.points[0]) - } - elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: - return { - 'type': 'MultiPoint', - 'coordinates': tuple([tuple(p) for p in self.points]) - } - elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: - if len(self.parts) == 1: - return { - 'type': 'LineString', - 'coordinates': tuple([tuple(p) for p in self.points]) - } - else: - ps = None - coordinates = [] - for part in self.parts: - if ps == None: - ps = part - continue - else: - coordinates.append(tuple([tuple(p) for p in self.points[ps:part]])) - ps = part - else: - coordinates.append(tuple([tuple(p) for p in self.points[part:]])) - return { - 'type': 'MultiLineString', - 'coordinates': tuple(coordinates) - } - elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: - if len(self.parts) == 1: - return { - 'type': 'Polygon', - 'coordinates': (tuple([tuple(p) for p in self.points]),) - } - else: - ps = None - coordinates = [] - for part in self.parts: - if ps == None: - ps = part - continue - else: - coordinates.append(tuple([tuple(p) for p in self.points[ps:part]])) - ps = part - else: - coordinates.append(tuple([tuple(p) for p in self.points[part:]])) - polys = [] - poly = [coordinates[0]] - for coord in coordinates[1:]: - if signed_area(coord) < 0: - polys.append(poly) - poly = [coord] - else: - poly.append(coord) - polys.append(poly) - if len(polys) == 1: - return { - 'type': 'Polygon', - 'coordinates': tuple(polys[0]) - } - elif len(polys) > 1: - return { - 'type': 'MultiPolygon', - 'coordinates': polys - } - -class _ShapeRecord: - """A shape object of any type.""" - def __init__(self, shape=None, record=None): - self.shape = shape - self.record = record - -class ShapefileException(Exception): - """An exception to handle shapefile specific problems.""" - pass - -class Reader: - """Reads the three files of a shapefile as a unit or - separately. If one of the three files (.shp, .shx, - .dbf) is missing no exception is thrown until you try - to call a method that depends on that particular file. - The .shx index file is used if available for efficiency - but is not required to read the geometry from the .shp - file. The "shapefile" argument in the constructor is the - name of the file you want to open. - - You can instantiate a Reader without specifying a shapefile - and then specify one later with the load() method. - - Only the shapefile headers are read upon loading. Content - within each file is only accessed when required and as - efficiently as possible. Shapefiles are usually not large - but they can be. - """ - def __init__(self, *args, **kwargs): - self.shp = None - self.shx = None - self.dbf = None - self.shapeName = "Not specified" - self._offsets = [] - self.shpLength = None - self.numRecords = None - self.fields = [] - self.__dbfHdrLength = 0 - # See if a shapefile name was passed as an argument - if len(args) > 0: - if is_string(args[0]): - self.load(args[0]) - return - if "shp" in list(kwargs.keys()): - if hasattr(kwargs["shp"], "read"): - self.shp = kwargs["shp"] - if hasattr(self.shp, "seek"): - self.shp.seek(0) - if "shx" in list(kwargs.keys()): - if hasattr(kwargs["shx"], "read"): - self.shx = kwargs["shx"] - if hasattr(self.shx, "seek"): - self.shx.seek(0) - if "dbf" in list(kwargs.keys()): - if hasattr(kwargs["dbf"], "read"): - self.dbf = kwargs["dbf"] - if hasattr(self.dbf, "seek"): - self.dbf.seek(0) - if self.shp or self.dbf: - self.load() - else: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") - - def load(self, shapefile=None): - """Opens a shapefile from a filename or file-like - object. Normally this method would be called by the - constructor with the file object or file name as an - argument.""" - if shapefile: - (shapeName, ext) = os.path.splitext(shapefile) - self.shapeName = shapeName - try: - self.shp = open("%s.shp" % shapeName, "rb") - except IOError: - raise ShapefileException("Unable to open %s.shp" % shapeName) - try: - self.shx = open("%s.shx" % shapeName, "rb") - except IOError: - raise ShapefileException("Unable to open %s.shx" % shapeName) - try: - self.dbf = open("%s.dbf" % shapeName, "rb") - except IOError: - raise ShapefileException("Unable to open %s.dbf" % shapeName) - if self.shp: - self.__shpHeader() - if self.dbf: - self.__dbfHeader() - - def __getFileObj(self, f): - """Checks to see if the requested shapefile file object is - available. If not a ShapefileException is raised.""" - if not f: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") - if self.shp and self.shpLength is None: - self.load() - if self.dbf and len(self.fields) == 0: - self.load() - return f - - def __restrictIndex(self, i): - """Provides list-like handling of a record index with a clearer - error message if the index is out of bounds.""" - if self.numRecords: - rmax = self.numRecords - 1 - if abs(i) > rmax: - raise IndexError("Shape or Record index out of range.") - if i < 0: - i = list(range(self.numRecords))[i] - return i - - def __shpHeader(self): - """Reads the header information from a .shp or .shx file.""" - if not self.shp: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shp file found") - shp = self.shp - # File length (16-bit word * 2 = bytes) - shp.seek(24) - self.shpLength = unpack(">i", shp.read(4))[0] * 2 - # Shape type - shp.seek(32) - self.shapeType = unpack("2i", f.read(8)) - # Determine the start of the next record - next = f.tell() + (2 * recLength) - shapeType = unpack(" -10e38: - record.m.append(m) - else: - record.m.append(None) - # Read a single point - if shapeType in (1, 11, 21): - record.points = [_Array('d', unpack("<2d", f.read(16)))] - # Read a single Z value - if shapeType == 11: - record.z = unpack("i", shx.read(4))[0] * 2) - 100 - numRecords = shxRecordLength // 8 - # Jump to the first record. - shx.seek(100) - for r in range(numRecords): - # Offsets are 16-bit words just like the file length - self._offsets.append(unpack(">i", shx.read(4))[0] * 2) - shx.seek(shx.tell() + 4) - if not i == None: - return self._offsets[i] - - def shape(self, i=0): - """Returns a shape object for a shape in the the geometry - record file.""" - shp = self.__getFileObj(self.shp) - i = self.__restrictIndex(i) - offset = self.__shapeIndex(i) - if not offset: - # Shx index not available so iterate the full list. - for j, k in enumerate(self.iterShapes()): - if j == i: - return k - shp.seek(offset) - return self.__shape() - - def shapes(self): - """Returns all shapes in a shapefile.""" - shp = self.__getFileObj(self.shp) - # Found shapefiles which report incorrect - # shp file length in the header. Can't trust - # that so we seek to the end of the file - # and figure it out. - shp.seek(0, 2) - self.shpLength = shp.tell() - shp.seek(100) - shapes = [] - while shp.tell() < self.shpLength: - shapes.append(self.__shape()) - return shapes - - def iterShapes(self): - """Serves up shapes in a shapefile as an iterator. Useful - for handling large shapefiles.""" - shp = self.__getFileObj(self.shp) - shp.seek(0,2) - self.shpLength = shp.tell() - shp.seek(100) - while shp.tell() < self.shpLength: - yield self.__shape() - - def __dbfHeaderLength(self): - """Retrieves the header length of a dbf file header.""" - if not self.__dbfHdrLength: - if not self.dbf: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no dbf file found)") - dbf = self.dbf - (self.numRecords, self.__dbfHdrLength) = \ - unpack("6i", 9994, 0, 0, 0, 0, 0)) - # File length (Bytes / 2 = 16-bit words) - if headerType == 'shp': - f.write(pack(">i", self.__shpFileLength())) - elif headerType == 'shx': - f.write(pack('>i', ((100 + (len(self._shapes) * 8)) // 2))) - # Version, Shape type - f.write(pack("<2i", 1000, self.shapeType)) - # The shapefile's bounding box (lower left, upper right) - if self.shapeType != 0: - try: - f.write(pack("<4d", *self.bbox())) - except error: - raise ShapefileException("Failed to write shapefile bounding box. Floats required.") - else: - f.write(pack("<4d", 0, 0, 0, 0)) - # Elevation - z = self.zbox() - # Measure - m = self.mbox() - try: - f.write(pack("<4d", z[0], z[1], m[0], m[1])) - except error: - raise ShapefileException("Failed to write shapefile elevation and measure values. Floats required.") - - def __dbfHeader(self): - """Writes the dbf header and field descriptors.""" - f = self.__getFileObj(self.dbf) - f.seek(0) - version = 3 - year, month, day = time.localtime()[:3] - year -= 1900 - # Remove deletion flag placeholder from fields - for field in self.fields: - if field[0].startswith("Deletion"): - self.fields.remove(field) - numRecs = len(self.records) - numFields = len(self.fields) - headerLength = numFields * 32 + 33 - recordLength = sum([int(field[2]) for field in self.fields]) + 1 - header = pack('2i", recNum, 0)) - recNum += 1 - start = f.tell() - # Shape Type - if self.shapeType != 31: - s.shapeType = self.shapeType - f.write(pack("i", length)) - f.seek(finish) - - def __shxRecords(self): - """Writes the shx records.""" - f = self.__getFileObj(self.shx) - f.seek(100) - for i in range(len(self._shapes)): - f.write(pack(">i", self._offsets[i] // 2)) - f.write(pack(">i", self._lengths[i])) - - def __dbfRecords(self): - """Writes the dbf records.""" - f = self.__getFileObj(self.dbf) - for record in self.records: - if not self.fields[0][0].startswith("Deletion"): - f.write(b(' ')) # deletion flag - for (fieldName, fieldType, size, dec), value in zip(self.fields, record): - fieldType = fieldType.upper() - size = int(size) - if fieldType.upper() == "N": - #value = str(value).rjust(size) - value = "{0:{1}.{2}f}".format(value, size, dec).rjust(size) - elif fieldType == 'L': - value = str(value)[0].upper() - else: - value = str(value)[:size].ljust(size) - try: - assert len(value) == size - except AssertionError: - raise AssertionError("Length of value exceeds the size allocated for the field") - value = b(value) - f.write(value) - - def null(self): - """Creates a null shape.""" - self._shapes.append(_Shape(NULL)) - - def point(self, x, y, z=0, m=0): - """Creates a point shape.""" - pointShape = _Shape(self.shapeType) - pointShape.points.append([x, y, z, m]) - self._shapes.append(pointShape) - - def line(self, parts=[], shapeType=POLYLINE): - """Creates a line shape. This method is just a convienience method - which wraps 'poly()'. - """ - self.poly(parts, shapeType, []) - - def poly(self, parts=[], shapeType=POLYGON, partTypes=[]): - """Creates a shape that has multiple collections of points (parts) - including lines, polygons, and even multipoint shapes. If no shape type - is specified it defaults to 'polygon'. If no part types are specified - (which they normally won't be) then all parts default to the shape type. - """ - polyShape = _Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - # Make sure polygons are closed - if shapeType in (5, 15, 25, 31): - for part in parts: - if part[0] != part[-1]: - part.append(part[0]) - for part in parts: - polyShape.parts.append(len(polyShape.points)) - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - # Make sure point has z and m values - while len(point) < 4: - point.append(0) - polyShape.points.append(point) - if polyShape.shapeType == 31: - if not partTypes: - for part in parts: - partTypes.append(polyShape.shapeType) - polyShape.partTypes = partTypes - self._shapes.append(polyShape) - - def field(self, name, fieldType="C", size="50", decimal=0): - """Adds a dbf field descriptor to the shapefile.""" - self.fields.append((name, fieldType, size, decimal)) - - def record(self, *recordList, **recordDict): - """Creates a dbf attribute record. You can submit either a sequence of - field values or keyword arguments of field names and values. Before - adding records you must add fields for the record values using the - fields() method. If the record values exceed the number of fields the - extra ones won't be added. In the case of using keyword arguments to specify - field/value pairs only fields matching the already registered fields - will be added.""" - record = [] - fieldCount = len(self.fields) - # Compensate for deletion flag - if self.fields[0][0].startswith("Deletion"): - fieldCount -= 1 - if recordList: - [record.append(recordList[i]) for i in range(fieldCount)] - elif recordDict: - for field in self.fields: - if field[0] in recordDict: - val = recordDict[field[0]] - if val is None: - record.append("") - else: - record.append(val) - if record: - self.records.append(record) - - def shape(self, i): - return self._shapes[i] - - def shapes(self): - """Return the current list of shapes.""" - return self._shapes - - def saveShp(self, target): - """Save an shp file.""" - if not hasattr(target, "write"): - target = os.path.splitext(target)[0] + '.shp' - if not self.shapeType: - self.shapeType = self._shapes[0].shapeType - self.shp = self.__getFileObj(target) - self.__shapefileHeader(self.shp, headerType='shp') - self.__shpRecords() - - def saveShx(self, target): - """Save an shx file.""" - if not hasattr(target, "write"): - target = os.path.splitext(target)[0] + '.shx' - if not self.shapeType: - self.shapeType = self._shapes[0].shapeType - self.shx = self.__getFileObj(target) - self.__shapefileHeader(self.shx, headerType='shx') - self.__shxRecords() - - def saveDbf(self, target): - """Save a dbf file.""" - if not hasattr(target, "write"): - target = os.path.splitext(target)[0] + '.dbf' - self.dbf = self.__getFileObj(target) - self.__dbfHeader() - self.__dbfRecords() - - def save(self, target=None, shp=None, shx=None, dbf=None): - """Save the shapefile data to three files or - three file-like objects. SHP and DBF files can also - be written exclusively using saveShp, saveShx, and saveDbf respectively. - If target is specified but not shp,shx, or dbf then the target path and - file name are used. If no options or specified, a unique base file name - is generated to save the files and the base file name is returned as a - string. - """ - # Create a unique file name if one is not defined - if shp: - self.saveShp(shp) - if shx: - self.saveShx(shx) - if dbf: - self.saveDbf(dbf) - elif not shp and not shx and not dbf: - generated = False - if not target: - temp = tempfile.NamedTemporaryFile(prefix="shapefile_", dir=os.getcwd()) - target = temp.name - generated = True - self.saveShp(target) - self.shp.close() - self.saveShx(target) - self.shx.close() - self.saveDbf(target) - self.dbf.close() - if generated: - return target -class Editor(Writer): - def __init__(self, shapefile=None, shapeType=POINT, autoBalance=1): - self.autoBalance = autoBalance - if not shapefile: - Writer.__init__(self, shapeType) - elif is_string(shapefile): - base = os.path.splitext(shapefile)[0] - if os.path.isfile("%s.shp" % base): - r = Reader(base) - Writer.__init__(self, r.shapeType) - self._shapes = r.shapes() - self.fields = r.fields - self.records = r.records() - - def select(self, expr): - """Select one or more shapes (to be implemented)""" - # TODO: Implement expressions to select shapes. - pass - - def delete(self, shape=None, part=None, point=None): - """Deletes the specified part of any shape by specifying a shape - number, part number, or point number.""" - # shape, part, point - if shape and part and point: - del self._shapes[shape][part][point] - # shape, part - elif shape and part and not point: - del self._shapes[shape][part] - # shape - elif shape and not part and not point: - del self._shapes[shape] - # point - elif not shape and not part and point: - for s in self._shapes: - if s.shapeType == 1: - del self._shapes[point] - else: - for part in s.parts: - del s[part][point] - # part, point - elif not shape and part and point: - for s in self._shapes: - del s[part][point] - # part - elif not shape and part and not point: - for s in self._shapes: - del s[part] - - def point(self, x=None, y=None, z=None, m=None, shape=None, part=None, point=None, addr=None): - """Creates/updates a point shape. The arguments allows - you to update a specific point by shape, part, point of any - shape type.""" - # shape, part, point - if shape and part and point: - try: - self._shapes[shape] - except IndexError: - self._shapes.append([]) - try: - self._shapes[shape][part] - except IndexError: - self._shapes[shape].append([]) - try: - self._shapes[shape][part][point] - except IndexError: - self._shapes[shape][part].append([]) - p = self._shapes[shape][part][point] - if x: p[0] = x - if y: p[1] = y - if z: p[2] = z - if m: p[3] = m - self._shapes[shape][part][point] = p - # shape, part - elif shape and part and not point: - try: - self._shapes[shape] - except IndexError: - self._shapes.append([]) - try: - self._shapes[shape][part] - except IndexError: - self._shapes[shape].append([]) - points = self._shapes[shape][part] - for i in range(len(points)): - p = points[i] - if x: p[0] = x - if y: p[1] = y - if z: p[2] = z - if m: p[3] = m - self._shapes[shape][part][i] = p - # shape - elif shape and not part and not point: - try: - self._shapes[shape] - except IndexError: - self._shapes.append([]) - - # point - # part - if addr: - shape, part, point = addr - self._shapes[shape][part][point] = [x, y, z, m] - else: - Writer.point(self, x, y, z, m) - if self.autoBalance: - self.balance() - - def validate(self): - """An optional method to try and validate the shapefile - as much as possible before writing it (not implemented).""" - #TODO: Implement validation method - pass - - def balance(self): - """Adds a corresponding empty attribute or null geometry record depending - on which type of record was created to make sure all three files - are in synch.""" - if len(self.records) > len(self._shapes): - self.null() - elif len(self.records) < len(self._shapes): - self.record() - - def __fieldNorm(self, fieldName): - """Normalizes a dbf field name to fit within the spec and the - expectations of certain ESRI software.""" - if len(fieldName) > 11: fieldName = fieldName[:11] - fieldName = fieldName.upper() - fieldName.replace(' ', '_') - -# Begin Testing -def test(): - import doctest - doctest.NORMALIZE_WHITESPACE = 1 - doctest.testfile("README.txt", verbose=1) - -if __name__ == "__main__": - """ - Doctests are contained in the file 'README.txt'. This library was originally developed - using Python 2.3. Python 2.4 and above have some excellent improvements in the built-in - testing libraries but for now unit testing is done using what's available in - 2.3. - """ - test() diff --git a/Utilities/shptools.py b/Utilities/shptools.py index daa6054c..32c6bb0c 100644 --- a/Utilities/shptools.py +++ b/Utilities/shptools.py @@ -3,8 +3,8 @@ =============================================================== .. module: shptools - :synopsis: A collection of useful functions to manipulate shapefiles. Uses the `shapefile - library `_ + :synopsis: A collection of useful functions to manipulate shapefiles. + Uses the `pyshp library `_ @@ -12,7 +12,7 @@ import logging -from . import shapefile +import shapefile import numpy as np @@ -21,32 +21,55 @@ # For all observation points/line segments: -OBSFIELD_NAMES = ('Indicator', 'TCID', 'Year', 'Month', - 'Day', 'Hour', 'Minute', 'TimeElapsed', 'Longitude', - 'Latitude', 'Speed', 'Bearing', 'CentralPressure', - 'WindSpeed', 'rMax', 'EnvPressure') -OBSFIELD_TYPES = ('N',)*16 +OBSFIELD_NAMES = ( + "Indicator", + "TCID", + "Year", + "Month", + "Day", + "Hour", + "Minute", + "TimeElapsed", + "Longitude", + "Latitude", + "Speed", + "Bearing", + "CentralPressure", + "WindSpeed", + "rMax", + "EnvPressure", +) +OBSFIELD_TYPES = ("N",) * 16 OBSFIELD_WIDTH = (1, 6, 4, 2, 2, 2, 2, 6, 7, 7, 6, 6, 7, 6, 6, 7) -OBSFIELD_PREC = (0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 1) +OBSFIELD_PREC = (0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 1) -OBSFIELDS = [[n, t, w, p] for n, t, w, p in zip(OBSFIELD_NAMES, - OBSFIELD_TYPES, - OBSFIELD_WIDTH, - OBSFIELD_PREC)] +OBSFIELDS = [ + [n, t, w, p] + for n, t, w, p in zip(OBSFIELD_NAMES, OBSFIELD_TYPES, OBSFIELD_WIDTH, OBSFIELD_PREC) +] # For storing events as a single polyline: -EVENTFIELD_NAMES = ('TCID', 'Year', 'Month', 'Day', 'Hour', 'Minute', 'Age', - 'MinPressure', 'MaxWindSpeed' ) -EVENTFIELD_TYPES = ('N',)*9 +EVENTFIELD_NAMES = ( + "TCID", + "Year", + "Month", + "Day", + "Hour", + "Minute", + "Age", + "MinPressure", + "MaxWindSpeed", +) +EVENTFIELD_TYPES = ("N",) * 9 EVENTFIELD_WIDTH = (6, 4, 2, 2, 2, 2, 6, 7, 7) -EVENTFIELD_PREC = (0, 0, 0, 0, 0, 0, 2, 2, 1) - -EVENTFIELDS = [[n, t, w, p] for n, t, w, p in zip(EVENTFIELD_NAMES, - EVENTFIELD_TYPES, - EVENTFIELD_WIDTH, - EVENTFIELD_PREC)] - +EVENTFIELD_PREC = (0, 0, 0, 0, 0, 0, 2, 2, 1) +EVENTFIELDS = [ + [n, t, w, p] + for n, t, w, p in zip( + EVENTFIELD_NAMES, EVENTFIELD_TYPES, EVENTFIELD_WIDTH, EVENTFIELD_PREC + ) +] def parseData(data): @@ -66,23 +89,23 @@ def parseData(data): fields = [] records = [] for k in list(data.keys()): - t = data[k]['Type'] - l = data[k]['Length'] + t = data[k]["Type"] + l = data[k]["Length"] if t == 0: - nt = 'C' + nt = "C" p = 0 elif t == 1: - nt = 'N' + nt = "N" p = 0 elif t == 2: - nt = 'N' - p = data[k]['Precision'] + nt = "N" + p = data[k]["Precision"] else: nt = t p = 0 fields.append([k, nt, l, p]) - records.append([data[k]['Data']]) + records.append([data[k]["Data"]]) return fields, records @@ -114,7 +137,7 @@ def shpCreateFile(outputFile, shapes, data, shpType): fields, records = parseData(data) log.info("Writing data to {0}.shp".format(outputFile)) - w = shapefile.Writer(shpType) + w = shapefile.Writer(outputFile, shpType) # Set the fields: for f in fields: w.field(*f) @@ -125,7 +148,7 @@ def shpCreateFile(outputFile, shapes, data, shpType): start = 0 new_parts = [] for p in s.parts[1:]: - new_parts.append(list(s.points[start:p - 1])) + new_parts.append(list(s.points[start : p - 1])) start = p new_parts.append(list(s.points[p:-1])) w.poly(parts=new_parts) @@ -134,12 +157,13 @@ def shpCreateFile(outputFile, shapes, data, shpType): w.record(*r) try: - w.save(outputFile) + w.close() except shapefile.ShapefileException: - log.exception("Unable to save data to {0}".format(outputFile) ) + log.exception("Unable to save data to {0}".format(outputFile)) return + def shpSaveTrackFile(outputFile, tracks, fmt="point"): """ Save track data to shapefile. The fields are sorted using the same @@ -164,12 +188,15 @@ def shpSaveTrackFile(outputFile, tracks, fmt="point"): elif fmt == "segments": tracks2line(tracks, outputFile, dissolve=False) else: - log.critical(("Unknown output format - must be one of 'points', " - "'lines' or 'segments'")) + log.critical( + ( + "Unknown output format - must be one of 'points', " + "'lines' or 'segments'" + ) + ) return - def shpWriteShapeFile(outputFile, shpType, fields, shapes, records): """ Save data to a shapefile. The fields are sorted using the same @@ -191,7 +218,7 @@ def shpWriteShapeFile(outputFile, shpType, fields, shapes, records): """ log.info("Writing data to {0}.shp".format(outputFile)) - w = shapefile.Writer(shpType) + w = shapefile.Writer(outputFile, shpType) # Set the fields: for f in fields: @@ -203,7 +230,7 @@ def shpWriteShapeFile(outputFile, shpType, fields, shapes, records): start = 0 new_parts = [] for p in shp.parts[1:]: - new_parts.append(list(shp.points[start:p-1])) + new_parts.append(list(shp.points[start : p - 1])) start = p new_parts.append(list(shp.points[p:-1])) w.poly(parts=new_parts) @@ -212,12 +239,13 @@ def shpWriteShapeFile(outputFile, shpType, fields, shapes, records): w.record(*rec) try: - w.save(outputFile) + w.close() except shapefile.ShapefileException: - log.exception("Unable to save data to {0}".format(outputFile) ) + log.exception("Unable to save data to {0}".format(outputFile)) return + def shpGetVertices(shape_file, key_name=None): """ Returns a dictionary of arrays containing coordinate pairs @@ -262,7 +290,7 @@ def shpGetVertices(shape_file, key_name=None): raise else: - fields = sf.fields[1:] # Drop first field (DeletionFlag) + fields = sf.fields[1:] # Drop first field (DeletionFlag) field_names = [fields[i][0] for i in range(len(fields))] if key_name and (key_name in field_names): @@ -279,6 +307,7 @@ def shpGetVertices(shape_file, key_name=None): return vertices + def shpGetField(shape_file, field_name, dtype=float): """ Extract from the records the value of the field corresponding @@ -310,12 +339,11 @@ def shpGetField(shape_file, field_name, dtype=float): raise else: - fields = sf.fields[1:] # Drop first field (DeletionFlag) + fields = sf.fields[1:] # Drop first field (DeletionFlag) field_names = [fields[i][0] for i in range(len(fields))] if field_name not in field_names: - log.warning("No field '{0}' in the list of fieldnames" . - format(field_name)) + log.warning("No field '{0}' in the list of fieldnames".format(field_name)) log.warning("Unable to proceed with processing") raise ValueError @@ -327,16 +355,15 @@ def shpGetField(shape_file, field_name, dtype=float): if dtype != str: # For non-string data, return a numpy array: - output = np.array([records[rec][idx] for rec in range(nrecords)], - dtype=dtype) + output = np.array([records[rec][idx] for rec in range(nrecords)], dtype=dtype) else: # Otherwise, return a list: output = [records[rec][idx] for rec in range(nrecords)] - return output + def shpReadShapeFile(shape_file): """ Return the vertices and records for the given shape file @@ -351,7 +378,7 @@ def shpReadShapeFile(shape_file): """ try: - sf = shapefile.Reader(shape_file,"rb") + sf = shapefile.Reader(shape_file, "rb") except shapefile.ShapefileException: log.exception("Cannot read {0}".format(shape_file)) raise @@ -362,6 +389,7 @@ def shpReadShapeFile(shape_file): return vertices, records + def tracks2point(tracks, outputFile): """ Writes tracks to a shapefile as a collection of point features @@ -372,7 +400,7 @@ def tracks2point(tracks, outputFile): :param str outputFile: Path to output file destination """ - sf = shapefile.Writer(shapefile.POINT) + sf = shapefile.Writer(outputFile, shapefile.POINT) sf.fields = OBSFIELDS for track in tracks: @@ -381,12 +409,13 @@ def tracks2point(tracks, outputFile): sf.record(*rec) try: - sf.save(outputFile) + sf.close() except shapefile.ShapefileException: raise return + def tracks2line(tracks, outputFile, dissolve=False): """ Writes tracks to a shapefile as a collection of line features @@ -405,7 +434,7 @@ def tracks2line(tracks, outputFile, dissolve=False): :param dissolve: Store track features or track segments. """ - sf = shapefile.Writer(shapefile.POLYLINE) + sf = shapefile.Writer(outputFile, shapefile.POLYLINE) if dissolve: sf.fields = EVENTFIELDS else: @@ -420,12 +449,10 @@ def tracks2line(tracks, outputFile, dissolve=False): # into multiple parts: idx = np.argmin(dlon) parts = [] - lines = zip(track.Longitude[:idx], - track.Latitude[:idx]) + lines = zip(track.Longitude[:idx], track.Latitude[:idx]) parts.append(lines) - lines = zip(track.Longitude[idx+1:], - track.Latitude[idx+1:]) + lines = zip(track.Longitude[idx + 1 :], track.Latitude[idx + 1 :]) parts.append(lines) sf.line(parts) @@ -436,7 +463,6 @@ def tracks2line(tracks, outputFile, dissolve=False): lines = zip(track.Longitude, track.Latitude) sf.line([lines]) - minPressure = track.trackMinPressure maxWind = track.trackMaxWind @@ -447,39 +473,62 @@ def tracks2line(tracks, outputFile, dissolve=False): startDay = track.Day[0] startHour = track.Hour[0] startMin = track.Minute[0] - record = [track.CycloneNumber[0], startYear, startMonth, startDay, - startHour, startMin, age, minPressure, maxWind] + record = [ + track.CycloneNumber[0], + startYear, + startMonth, + startDay, + startHour, + startMin, + age, + minPressure, + maxWind, + ] sf.record(*record) else: if len(track.data) == 1: - line = [[[track.Longitude, track.Latitude], - [track.Longitude, track.Latitude]]] + line = [ + [ + [track.Longitude, track.Latitude], + [track.Longitude, track.Latitude], + ] + ] sf.line(line) sf.record(*track.data[0]) else: for n in range(len(track.data) - 1): dlon = track.Longitude[n + 1] - track.Longitude[n] - if dlon < -180.: + if dlon < -180.0: # case where the track crosses 0E: - segment = [[[track.Longitude[n], track.Latitude[n]], - [track.Longitude[n], track.Latitude[n]]]] + segment = [ + [ + [track.Longitude[n], track.Latitude[n]], + [track.Longitude[n], track.Latitude[n]], + ] + ] else: - segment = [[[track.Longitude[n], - track.Latitude[n]], - [track.Longitude[n + 1], - track.Latitude[n + 1]]]] + segment = [ + [ + [track.Longitude[n], track.Latitude[n]], + [track.Longitude[n + 1], track.Latitude[n + 1]], + ] + ] sf.line(segment) sf.record(*track.data[n]) # Last point in the track: - sf.line([[[track.Longitude[n + 1], - track.Latitude[n + 1]], - [track.Longitude[n + 1], - track.Latitude[n + 1]]]]) - sf.record(*track.data[n+1]) + sf.line( + [ + [ + [track.Longitude[n + 1], track.Latitude[n + 1]], + [track.Longitude[n + 1], track.Latitude[n + 1]], + ] + ] + ) + sf.record(*track.data[n + 1]) try: - sf.save(outputFile) + sf.close() except shapefile.ShapefileException: raise diff --git a/Utilities/tracks2shp.py b/Utilities/tracks2shp.py index c15e03d6..5770fa8e 100644 --- a/Utilities/tracks2shp.py +++ b/Utilities/tracks2shp.py @@ -10,7 +10,7 @@ """ -import Utilities.shapefile as shapefile +import shapefile import numpy as np import logging @@ -18,42 +18,82 @@ LOG = logging.getLogger(__name__) # For all observation points/line segments: -OBSFIELD_NAMES = ('Indicator', 'TCID', 'Year', 'Month', - 'Day', 'Hour', 'Minute', 'TElapsed', 'Longitude', - 'Latitude', 'Speed', 'Bearing', 'Pcentre', - 'MaxWind', 'rMax', 'Penv', 'Category') -OBSFIELD_TYPES = ('N',)*17 +OBSFIELD_NAMES = ( + "Indicator", + "TCID", + "Year", + "Month", + "Day", + "Hour", + "Minute", + "TElapsed", + "Longitude", + "Latitude", + "Speed", + "Bearing", + "Pcentre", + "MaxWind", + "rMax", + "Penv", + "Category", +) +OBSFIELD_TYPES = ("N",) * 17 OBSFIELD_WIDTH = (1, 6, 4, 2, 2, 2, 2, 6, 7, 7, 6, 6, 7, 6, 6, 7, 1) -OBSFIELD_PREC = (0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 1, 0) - -OBSFIELDS = [[n, t, w, p] for n, t, w, p in zip(OBSFIELD_NAMES, - OBSFIELD_TYPES, - OBSFIELD_WIDTH, - OBSFIELD_PREC)] - -TCRM_FIELD_NAMES = ('CycloneNumber', 'TimeElapsed', 'Longitude', 'Latitude', - 'Speed', 'Bearing', 'CentralPressure', 'EnvPressure', - 'rMax','Category') -TCRM_FIELD_TYPES = ('N',) * 10 +OBSFIELD_PREC = (0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 1, 0) + +OBSFIELDS = [ + [n, t, w, p] + for n, t, w, p in zip(OBSFIELD_NAMES, OBSFIELD_TYPES, + OBSFIELD_WIDTH, OBSFIELD_PREC) +] + +TCRM_FIELD_NAMES = ( + "CycloneNumber", + "TimeElapsed", + "Longitude", + "Latitude", + "Speed", + "Bearing", + "CentralPressure", + "EnvPressure", + "rMax", + "Category", +) +TCRM_FIELD_TYPES = ("N",) * 10 TCRM_FIELD_WIDTH = (6, 7, 9, 9, 8, 8, 8, 8, 8, 1) -TCRM_FIELD_PREC = (0, 2, 4, 4, 4, 4, 3, 3, 4, 0) +TCRM_FIELD_PREC = (0, 2, 4, 4, 4, 4, 3, 3, 4, 0) -TCRM_FIELDS = [[n, t, w, p] for n, t, w, p in zip(TCRM_FIELD_NAMES, - TCRM_FIELD_TYPES, - TCRM_FIELD_WIDTH, - TCRM_FIELD_PREC)] +TCRM_FIELDS = [ + [n, t, w, p] + for n, t, w, p in zip( + TCRM_FIELD_NAMES, TCRM_FIELD_TYPES, TCRM_FIELD_WIDTH, TCRM_FIELD_PREC + ) +] # For storing events as a single polyline: -EVENTFIELD_NAMES = ('TCID', 'Year', 'Month', 'Day', 'Hour', 'Minute', 'Age', - 'MinCP', 'MaxWind' ) -EVENTFIELD_TYPES = ('N',)*9 +EVENTFIELD_NAMES = ( + "TCID", + "Year", + "Month", + "Day", + "Hour", + "Minute", + "Age", + "MinCP", + "MaxWind", +) +EVENTFIELD_TYPES = ("N",) * 9 EVENTFIELD_WIDTH = (6, 4, 2, 2, 2, 2, 6, 7, 7) -EVENTFIELD_PREC = (0, 0, 0, 0, 0, 0, 2, 2, 1) +EVENTFIELD_PREC = (0, 0, 0, 0, 0, 0, 2, 2, 1) + +EVENTFIELDS = [ + [n, t, w, p] + for n, t, w, p in zip( + EVENTFIELD_NAMES, EVENTFIELD_TYPES, + EVENTFIELD_WIDTH, EVENTFIELD_PREC + ) +] -EVENTFIELDS = [[n, t, w, p] for n, t, w, p in zip(EVENTFIELD_NAMES, - EVENTFIELD_TYPES, - EVENTFIELD_WIDTH, - EVENTFIELD_PREC)] def recdropfields(rec, names): """ @@ -70,8 +110,9 @@ def recdropfields(rec, names): names = set(names) - newdtype = np.dtype([(name, rec.dtype[name]) for name in rec.dtype.names - if name not in names]) + newdtype = np.dtype( + [(name, rec.dtype[name]) for name in rec.dtype.names if name not in names] # noqa + ) newrec = np.recarray(rec.shape, dtype=newdtype) for field in newdtype.names: @@ -97,7 +138,7 @@ def add_category(tracks): Add a category field (for central pressure) to the tracks. """ for track in tracks: - track.data = add_field(track.data, [('Category', int)]) + track.data = add_field(track.data, [("Category", int)]) for rec in track.data: if rec["CentralPressure"] < 930: @@ -129,7 +170,7 @@ def tracks2point(tracks, outputFile, netcdf_format=False): """ LOG.info("Writing point shape file: {0}".format(outputFile)) - sf = shapefile.Writer(shapefile.POINT) + sf = shapefile.Writer(outputFile, shapefile.POINT) if netcdf_format: sf.fields = TCRM_FIELDS else: @@ -138,19 +179,20 @@ def tracks2point(tracks, outputFile, netcdf_format=False): LOG.debug("Processing {0} tracks".format(len(tracks))) for track in tracks: - track.data = recdropfields(track.data, ['Datetime']) + track.data = recdropfields(track.data, ["Datetime"]) for lon, lat, rec in zip(track.Longitude, track.Latitude, track.data): sf.point(lon, lat) sf.record(*rec) try: - sf.save(outputFile) + sf.close() except shapefile.ShapefileException: LOG.exception("Cannot save shape file: {0}".format(outputFile)) raise return + def tracks2line(tracks, outputFile, dissolve=False, netcdf_format=False): """ Writes tracks to a shapefile as a collection of line features @@ -174,7 +216,7 @@ def tracks2line(tracks, outputFile, dissolve=False, netcdf_format=False): when attempting to save the file. """ LOG.info("Writing line shape file: {0}".format(outputFile)) - sf = shapefile.Writer(shapefile.POLYLINE) + sf = shapefile.Writer(outputFile, shapefile.POLYLINE) if netcdf_format: sf.fields = TCRM_FIELDS elif dissolve: @@ -185,7 +227,7 @@ def tracks2line(tracks, outputFile, dissolve=False, netcdf_format=False): LOG.debug("Processing {0} tracks".format(len(tracks))) for track in tracks: - track.data = recdropfields(track.data, ['Datetime']) + track.data = recdropfields(track.data, ["Datetime"]) if dissolve: if len(track.data) > 1: @@ -195,12 +237,10 @@ def tracks2line(tracks, outputFile, dissolve=False, netcdf_format=False): # into multiple parts: idx = np.argmin(dlon) parts = [] - lines = zip(track.Longitude[:idx], - track.Latitude[:idx]) + lines = zip(track.Longitude[:idx], track.Latitude[:idx]) parts.append(lines) - lines = zip(track.Longitude[idx+1:], - track.Latitude[idx+1:]) + lines = zip(track.Longitude[idx + 1 :], track.Latitude[idx + 1 :]) # noqa parts.append(lines) sf.line(parts) @@ -211,7 +251,6 @@ def tracks2line(tracks, outputFile, dissolve=False, netcdf_format=False): lines = zip(track.Longitude, track.Latitude) sf.line([lines]) - if netcdf_format: sf.record(*track.data[0]) else: @@ -225,45 +264,69 @@ def tracks2line(tracks, outputFile, dissolve=False, netcdf_format=False): startDay = track.Day[0] startHour = track.Hour[0] startMin = track.Minute[0] - record = [track.CycloneNumber[0], startYear, startMonth, startDay, - startHour, startMin, age, minPressure, maxWind] + record = [ + track.CycloneNumber[0], + startYear, + startMonth, + startDay, + startHour, + startMin, + age, + minPressure, + maxWind, + ] sf.record(*record) else: if len(track.data) == 1: - line = [[[track.Longitude, track.Latitude], - [track.Longitude, track.Latitude]]] + line = [ + [ + [track.Longitude, track.Latitude], + [track.Longitude, track.Latitude], + ] + ] sf.line(line) sf.record(*track.data[0]) else: for n in range(len(track.data) - 1): dlon = track.Longitude[n + 1] - track.Longitude[n] - if dlon < -180.: + if dlon < -180.0: # case where the track crosses 0E: - segment = [[[track.Longitude[n], track.Latitude[n]], - [track.Longitude[n], track.Latitude[n]]]] + segment = [ + [ + [track.Longitude[n], track.Latitude[n]], + [track.Longitude[n], track.Latitude[n]], + ] + ] else: - segment = [[[track.Longitude[n], - track.Latitude[n]], - [track.Longitude[n + 1], - track.Latitude[n + 1]]]] + segment = [ + [ + [track.Longitude[n], track.Latitude[n]], + [track.Longitude[n + 1], track.Latitude[n + 1]], # noqa + ] + ] sf.line(segment) sf.record(*track.data[n]) # Last point in the track: - sf.line([[[track.Longitude[n + 1], - track.Latitude[n + 1]], - [track.Longitude[n + 1], - track.Latitude[n + 1]]]]) - sf.record(*track.data[n+1]) + sf.line( + [ + [ + [track.Longitude[n + 1], track.Latitude[n + 1]], + [track.Longitude[n + 1], track.Latitude[n + 1]], + ] + ] + ) + sf.record(*track.data[n + 1]) try: - sf.save(outputFile) + sf.close() except shapefile.ShapefileException: LOG.exception("Cannot save shape file: {0}".format(outputFile)) raise -if __name__ == '__main__': + +if __name__ == "__main__": from Utilities.loadData import loadTrackFile from Utilities.config import ConfigParser @@ -276,20 +339,24 @@ def tracks2line(tracks, outputFile, dissolve=False, netcdf_format=False): # pylint: disable=C0103 parser = argparse.ArgumentParser() - parser.add_argument('-c', '--config_file', help='Input configuration file') - parser.add_argument('-f', '--file', help='Input TC track file') - parser.add_argument('-s', '--source', - help='Input TC track file source format') - parser.add_argument('-v', '--verbose', - help='Print log messages to STDOUT', - action='store_true') + parser.add_argument("-c", "--config_file", + help="Input configuration file") + parser.add_argument("-f", "--file", + help="Input TC track file") + parser.add_argument("-s", "--source", + help="Input TC track file source format") + parser.add_argument( + "-v", "--verbose", + help="Print log messages to STDOUT", + action="store_true" + ) args = parser.parse_args() config_file = args.config_file config = ConfigParser() config.read(config_file) - logfile = config.get('Logging', 'LogFile') + logfile = config.get("Logging", "LogFile") logdir = dirname(realpath(logfile)) # If log file directory does not exist, create it @@ -297,11 +364,11 @@ def tracks2line(tracks, outputFile, dissolve=False, netcdf_format=False): try: os.makedirs(logdir) except OSError: - logfile = pjoin(os.getcwd(), 'tracks2shp.log') + logfile = pjoin(os.getcwd(), "tracks2shp.log") - logLevel = config.get('Logging', 'LogLevel') - verbose = config.getboolean('Logging', 'Verbose') - datestamp = config.getboolean('Logging', 'Datestamp') + logLevel = config.get("Logging", "LogLevel") + verbose = config.getboolean("Logging", "Verbose") + datestamp = config.getboolean("Logging", "Datestamp") if args.verbose: verbose = True @@ -311,22 +378,23 @@ def tracks2line(tracks, outputFile, dissolve=False, netcdf_format=False): if args.file: track_file = args.file else: - track_file = config.get('DataProcess', 'InputFile') + track_file = config.get("DataProcess", "InputFile") if args.source: source = args.source else: - source = config.get('DataProcess', 'Source') + source = config.get("DataProcess", "Source") output_path = dirname(realpath(track_file)) filename, ext = splitext(track_file) - pt_output_file = filename + '_pt.shp' - line_output_file = filename + '_line.shp' - dissolve_output_file = filename + '_dissolve.shp' + pt_output_file = filename + "_pt.shp" + line_output_file = filename + "_line.shp" + dissolve_output_file = filename + "_dissolve.shp" if track_file.endswith(".nc"): from Utilities.track import ncReadTrackData + tracks = ncReadTrackData(track_file) netcdf_format = True @@ -341,6 +409,8 @@ def tracks2line(tracks, outputFile, dissolve=False, netcdf_format=False): add_category(tracks) tracks2point(tracks, pt_output_file, netcdf_format=netcdf_format) tracks2line(tracks, line_output_file, netcdf_format=netcdf_format) - tracks2line(tracks, dissolve_output_file, dissolve=True, netcdf_format=netcdf_format) + tracks2line( + tracks, dissolve_output_file, + dissolve=True, netcdf_format=netcdf_format + ) LOG.info("Completed tracks2shp") -