Skip to content

Commit 17301b1

Browse files
committed
Added validation for date and time input values. Issue #39
1 parent 8a6e57d commit 17301b1

File tree

1 file changed

+82
-8
lines changed

1 file changed

+82
-8
lines changed

tools/scripts/csv2pt.py

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ def __init__(self, file_path, veg_codes):
165165
self.reqd_columns = REQD_COLUMNS
166166
self.sourceLatCol = sourceLatCol
167167
self.sourceLonCol = sourceLonCol
168+
self.sourceTimeCol = sourceTimeCol
169+
self.sourceDateCol = sourceDateCol
168170
self.veg_codes = veg_codes
169171

170172
@property
@@ -174,9 +176,10 @@ def valid(self):
174176
No missing base columns
175177
At least one vegetation column
176178
No malformed latitude or longitude values
179+
No malformed date or time values
177180
"""
178181
if self.file_exists and len(self.missing_columns) == 0 and len(self.veg_columns) > 0 and \
179-
len(self.lat_errors) == 0 and len(self.lon_errors) == 0:
182+
len(self.lat_errors) == 0 and len(self.lon_errors) == 0 and len(self.time_errors) == 0 and len(self.date_errors) == 0:
180183
return True
181184
else:
182185
return False
@@ -233,7 +236,6 @@ def veg_columns(self):
233236
_veg_columns = set(self.veg_codes).intersection(set(self.all_columns))
234237
return list(_veg_columns)
235238

236-
237239
@property
238240
def columns(self):
239241
"""combined list of base and vegetation columns in source csv file"""
@@ -258,13 +260,31 @@ def lon_errors(self):
258260
except:
259261
return None
260262

263+
@property
264+
def time_errors(self):
265+
"""list of rows in source csv file with erroneous time values"""
266+
try:
267+
_time_errors = self._validate_time(self.sourceTimeCol)
268+
return _time_errors
269+
except:
270+
return None
271+
272+
@property
273+
def date_errors(self):
274+
"""list of rows in source csv file with erroneous date values"""
275+
try:
276+
_date_errors = self._validate_date(self.sourceDateCol)
277+
return _date_errors
278+
except:
279+
return None
280+
261281
def _validate_latlon(self, col):
262282
"""Function to validate lat or lon values based on expected pattern"""
263283
# Assumes input in format ##d##.####' Dir or ##d##' Dir
264284
# latlon_pattern = "[0-9]+[d][0-9]+[.][0-9]+\' [nsewNSEW]"
265285
latlon_pattern = "[0-9]+[d][0-9]+[.]?[0-9]+\' [nsewNSEW]"
266-
error_rows = []
267-
#
286+
error_rows = [] # initialize list of rows with errors
287+
# Loop through data and validate lat/long value
268288
for i, row in enumerate(self.rows):
269289
csv_row = i + 1
270290
coord = row[col]
@@ -282,6 +302,48 @@ def _validate_latlon(self, col):
282302

283303
return error_rows
284304

305+
def _validate_time(self, col):
306+
"""
307+
Function to validate time values based on expected patter
308+
Input can be in 12-hour (AM/PM) or 24-hour format
309+
:param col:
310+
:return:
311+
"""
312+
error_rows = [] # initialize list of rows with errors
313+
# Loop through data and validate time values
314+
for i, row in enumerate(self.rows):
315+
csv_row = i + 1
316+
time_of_survey = row[col]
317+
time24hr_pattern = "^(2[0-3]|[01]?[0-9]):([0-5]?[0-9]):([0-5]?[0-9])$"
318+
time12hr_pattern = "^(1[0-2]|0?[1-9]):([0-5]?[0-9]):([0-5]?[0-9])( ?[AP]M)?$"
319+
320+
if "M" in time_of_survey:
321+
if not re.search(time12hr_pattern, time_of_survey):
322+
error_rows.append(csv_row)
323+
else:
324+
if not re.search(time24hr_pattern, time_of_survey):
325+
error_rows.append(csv_row)
326+
return error_rows
327+
328+
def _validate_date(self, col):
329+
"""
330+
Function to validate date values based on expected pattern
331+
Input expected to be in this format: m/d/yyyy or mm/dd/yyyy
332+
:param col:
333+
:return:
334+
"""
335+
error_rows = [] # initialize list of rows with errors
336+
# Loop through data and validate time values
337+
for i, row in enumerate(self.rows):
338+
csv_row = i + 1
339+
date_of_survey = row[col]
340+
try:
341+
[m, d, y] = date_of_survey.split('/')
342+
testdate = datetime.date(int(y), int(m), int(d))
343+
except:
344+
error_rows.append(csv_row)
345+
return error_rows
346+
285347
@property
286348
def dataframe(self):
287349
if self.valid:
@@ -483,7 +545,6 @@ def transect_video0(self):
483545
# print df_max.loc[tranCol > 1]
484546
return df_max
485547

486-
487548
def _validate_data(self):
488549
# Check validation properties
489550
if self.video_zero or self.null_video or self.null_veg or self.video_gt1 \
@@ -663,6 +724,14 @@ def write_csverr(self, csvsource):
663724
err_type = "Bad Longitude Values"
664725
details = 'Rows: ' + ';'.join(str(r) for r in csvsource.lon_errors)
665726
self.fh.write(",".join((csv_dir, csv_file, err_type, details)) + "\n")
727+
if csvsource.time_errors:
728+
err_type = "Bad Time Values"
729+
details = 'Rows: ' + ';'.join(str(r) for r in csvsource.time_errors)
730+
self.fh.write(",".join((csv_dir, csv_file, err_type, details)) + "\n")
731+
if csvsource.date_errors:
732+
err_type = "Bad Date Values"
733+
details = 'Rows: ' + ';'.join(str(r) for r in csvsource.date_errors)
734+
self.fh.write(",".join((csv_dir, csv_file, err_type, details)) + "\n")
666735

667736
def write_direrr(self, csv_dir):
668737

@@ -693,7 +762,7 @@ def write_datawarn(self, csvdata):
693762
self.fh.write(",".join((csv_dir, csv_file, err_type, details)) + "\n")
694763
if csvdata.null_video:
695764
err_type = "Null Video Values"
696-
# print ', '.join(str(x) for x in list_of_ints)
765+
# print(', '.join(str(x) for x in list_of_ints))
697766
details = 'Rows: ' + ';'.join(str(i) for i in csvdata.null_video)
698767
self.fh.write(",".join((csv_dir, csv_file, err_type, details)) + "\n")
699768
if csvdata.null_veg:
@@ -773,7 +842,7 @@ def main(in_dir, sites_file, vegcode_table, out_gdb, err_dir):
773842
if transectData.warnings:
774843
msg("Data Validation Warnings.\nWriting to log file: {1}".format(csvSource.file_path, warning_log.log_file))
775844
warning_log.write_datawarn(transectData)
776-
print "Creating Point feature class {0}".format(fc_path)
845+
msg("Creating Point feature class {0}".format(fc_path))
777846
ptFC = PointFC(transectData.nparray, fc_path)
778847
ptFC.create_fc()
779848
else:
@@ -804,21 +873,26 @@ def main(in_dir, sites_file, vegcode_table, out_gdb, err_dir):
804873
# Input parameter 1: Parent directory for site data folders and input csv files
805874
# in_dir = "Y:/projects/dnr_svmp2016/data/2014_test/site_folders"
806875
in_dir = "Y:/projects/dnr_svmp2016/data/examples"
876+
# in_dir = "Y:/projects/dnr_svmp2016/data/IslandCoMRC"
807877

808878
# Input parameter 2: Text file with list of sites to process
809879
sites_file = os.path.join("Y:/projects/dnr_svmp2016/data/examples", "sites2process_examples.txt")
880+
# sites_file = os.path.join("Y:/projects/dnr_svmp2016/data/IslandCoMRC", "sites2process_IslandCoMRC.txt")
810881

811882
# Input parameter 3: Table with vegetation codes
812883
vegcode_table = "Y:/projects/dnr_svmp2016/db/SVMP_2000_2015_DB.v4_20170109/SVMP_DB_v4_20170109.mdb/veg_codes"
884+
# vegcode_table = "Y:/projects/dnr_svmp2016/db/SVMP_DB_v5.2_20170815.mdb/veg_codes"
813885

814886
# Input parameter 4: Output Geodatabase to store point feature classes
815887
out_gdb = "Y:/projects/dnr_svmp2016/data/examples/examples_pgdb.mdb"
888+
# out_gdb = "Y:/projects/dnr_svmp2016/data/IslandCoMRC/IslandCoMRC_transects.mdb"
816889

817890
# Input parameter 5: Error Log directory
818891
err_dir = in_dir
892+
# err_dir = "Y:/projects/dnr_svmp2016/data/IslandCoMRC/err_logs"
819893

820894
main(in_dir, sites_file, vegcode_table, out_gdb, err_dir)
821895

822896
t1 = time.time()
823897

824-
print ("Total time elapsed is: %s seconds" %str(t1-t0))
898+
print("Total time elapsed is: %s seconds" %str(t1-t0))

0 commit comments

Comments
 (0)