Skip to content

Commit

Permalink
Merge pull request #122 from AISViz/noaa-ingestion
Browse files Browse the repository at this point in the history
Noaa ingestion
  • Loading branch information
gabrielspadon authored Jan 17, 2025
2 parents befd69b + d35a336 commit 5a02528
Show file tree
Hide file tree
Showing 7 changed files with 1,383 additions and 30 deletions.
2 changes: 1 addition & 1 deletion aisdb/database/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def decode_msgs(filepaths, dbconn, source, vacuum=False, skip_checksum=False,

if verbose:
print("checking file dates...")
filedates = [getfiledate(f) for f in raw_files]
filedates = [getfiledate(f, source) for f in raw_files]
months = [
month.strftime("%Y%m") for month in rrule(
freq=MONTHLY,
Expand Down
11 changes: 7 additions & 4 deletions aisdb/proc_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,14 +313,16 @@ def glob_files(dirpath, ext='.txt', keyorder=lambda key: key):
return sorted(extpaths, key=keyorder)


def getfiledate(filename):
def getfiledate(filename, source=None):
''' attempt to parse the first valid epoch timestamp from .nm4 data file.
timestamp will be returned as :class:`datetime.date` if successful,
otherwise will return False if no date could be found
args:
filename (string)
raw AIS data file in .nm4 format
raw AIS data file in .nm4 or .csv format
source (string)
data source of CSV file; time column handle is different if "NOAA" (case insensitive) is specified
'''
filesize = os.path.getsize(filename)
if filesize == 0: # pragma: no cover
Expand All @@ -330,15 +332,16 @@ def getfiledate(filename):
extension = os.path.splitext(filename)[1].lower()

if extension == ".csv":
# if filename.lower()[-3:] == "csv":
reader = csv.reader(f)
try:
head = next(reader)
row1 = next(reader)
except StopIteration:
return False
rowdict = {a: b for a, b in zip(head, row1)}
fdate = datetime.strptime(rowdict['Time'], '%Y%m%d_%H%M%S').date()
time_key = 'BaseDateTime' if source and "noaa" in source.lower() else 'Time'
time_format = '%Y-%m-%dT%H:%M:%S' if time_key == 'BaseDateTime' else '%Y%m%d_%H%M%S'
fdate = datetime.strptime(rowdict[time_key], time_format).date()
return fdate

elif extension == ".nm4":
Expand Down
28 changes: 28 additions & 0 deletions aisdb/tests/test_001_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,31 @@ def test_compare_sqlite_postgres_query_output(tmpdir):

for a, b in zip(tracks1, tracks2):
assert a['lat'] == b['lat']

def test_noaa_data_ingest_compare(tmpdir):
testdatacsv = os.path.join(os.path.dirname(__file__), "testdata", "test_data_noaa_20230101.csv")
filepaths = [testdatacsv]

testdbpath = os.path.join(tmpdir, "test_sqlite_noaa.db")

start_time = datetime(2023, 1, 1)
end_time = datetime(2023, 1, 31)

with PostgresDBConn(conn_information) as pgdb, DBConn(testdbpath) as sqlitedb:
decode_msgs(filepaths, dbconn=pgdb, source='NOAA', vacuum=False, verbose=True, skip_checksum=True)
pgdb.commit()

decode_msgs(filepaths, dbconn=sqlitedb, source='NOAA', vacuum=False, verbose=True, skip_checksum=True)
sqlitedb.commit()

rowgen1 = DBQuery(dbconn=sqlitedb, start=start_time, end=end_time,
callback=sqlfcn_callbacks.in_timerange_validmmsi, ).gen_qry(reaggregate_static=True)

rowgen2 = DBQuery(dbconn=pgdb, start=start_time, end=end_time,
callback=sqlfcn_callbacks.in_timerange_validmmsi, ).gen_qry(reaggregate_static=True)

tracks1 = list(TrackGen(rowgen1, decimate=False))
tracks2 = list(TrackGen(rowgen2, decimate=False))

for a, b in zip(tracks1, tracks2):
assert a['time'] == b['time']
1 change: 1 addition & 0 deletions aisdb/tests/test_013_proc_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def test_glob_files():
def test_getfiledate():
aisdb.proc_util.getfiledate(os.path.join(os.path.dirname(__file__), "testdata", "test_data_20211101.nm4"))
aisdb.proc_util.getfiledate(os.path.join(os.path.dirname(__file__), "testdata", "test_data_20210701.csv"))
aisdb.proc_util.getfiledate(os.path.join(os.path.dirname(__file__), "testdata", "test_data_noaa_20230101.csv"), source='NOAA')


def test_binarysearch():
Expand Down
Loading

0 comments on commit 5a02528

Please sign in to comment.