Skip to content

Commit 382e690

Browse files
Extract activity_datetime from FLUX messages
1 parent f94c1e5 commit 382e690

File tree

3 files changed

+253
-6
lines changed

3 files changed

+253
-6
lines changed

datascience/src/pipeline/parsers/flux/flux.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from xml.etree.ElementTree import ParseError
1010

1111
import pandas as pd
12+
from dateutil.parser import parse
1213

1314
from src.pipeline.parsers.flux.log_parsers import (
1415
null_parser,
@@ -68,7 +69,6 @@ def get_fishing_activity_type(fishing_activity: ET.Element) -> FluxFishingActivi
6869

6970

7071
def get_fa_report_type(fa_report_document: ET.Element) -> FluxFAReportDocumentType:
71-
7272
report_type = get_text(
7373
fa_report_document, './/ram:TypeCode[@listID="FLUX_FA_REPORT_TYPE"]'
7474
)
@@ -169,7 +169,6 @@ def get_operation_type(xml_element):
169169

170170

171171
def parse_metadata(fa_report_document: xml.etree.ElementTree.Element):
172-
173172
metadata = {
174173
"operation_type": get_operation_type(fa_report_document),
175174
"report_id": get_text(fa_report_document, './/ram:ID[@schemeID="UUID"]'),
@@ -208,15 +207,18 @@ def parse_fa_report_document(fa_report_document: ET.Element):
208207

209208
children = tagged_children(fa_report_document)
210209

210+
activity_datetimes_utc = []
211211
if "SpecifiedFishingActivity" in children:
212212
log_types = set()
213213
values = []
214214
for specified_fishing_activity in children["SpecifiedFishingActivity"]:
215-
log_type, value = parse_specified_fishing_activity(
215+
log_type, activity_datetime_utc, value = parse_specified_fishing_activity(
216216
specified_fishing_activity, report_type
217217
)
218218
log_types.add(log_type)
219219
values.append(value)
220+
if isinstance(activity_datetime_utc, datetime):
221+
activity_datetimes_utc.append(activity_datetime_utc)
220222
try:
221223
assert len(log_types) == 1
222224
except AssertionError:
@@ -235,7 +237,16 @@ def parse_fa_report_document(fa_report_document: ET.Element):
235237
else:
236238
data = dict()
237239

238-
fa_report_document_data = {**metadata, **data}
240+
if activity_datetimes_utc:
241+
activity_datetime_utc = min(activity_datetimes_utc)
242+
else:
243+
activity_datetime_utc = None
244+
245+
fa_report_document_data = {
246+
"activity_datetime_utc": activity_datetime_utc,
247+
**metadata,
248+
**data,
249+
}
239250

240251
return fa_report_document_data
241252

@@ -274,7 +285,28 @@ def parse_specified_fishing_activity(
274285
f"Could not find appropriate parser for log type {log_type}: ", e
275286
)
276287
value = parser(fishing_activity)
277-
return log_type, value
288+
289+
datetime_string = get_text(
290+
fishing_activity, ".//ram:OccurrenceDateTime/udt:DateTime"
291+
)
292+
if not datetime_string:
293+
datetime_string = get_text(
294+
fishing_activity,
295+
"./ram:SpecifiedDelimitedPeriod/ram:EndDateTime/udt:DateTime",
296+
)
297+
298+
if datetime_string:
299+
try:
300+
activity_datetime_utc = parse(datetime_string).replace(tzinfo=None)
301+
except Exception as e:
302+
logging.error(
303+
f"Cound not parse datetime string {datetime_string} with error: {e}"
304+
)
305+
activity_datetime_utc = None
306+
else:
307+
activity_datetime_utc = None
308+
309+
return log_type, activity_datetime_utc, value
278310

279311

280312
def get_list_fa_report_documents(fa_report_message: ET.Element) -> List[ET.Element]:

datascience/tests/test_pipeline/test_flows/test_logbook.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,4 +345,4 @@ def test_flow(mock_move, reset_test_data):
345345
final_logbook_reports.is_test_message, "operation_number"
346346
].values[0]
347347
) == "FRA20200321502645"
348-
assert final_logbook_reports.activity_datetime_utc.notnull().sum() == 13
348+
assert final_logbook_reports.activity_datetime_utc.notnull().sum() == 14

0 commit comments

Comments
 (0)