9
9
from xml .etree .ElementTree import ParseError
10
10
11
11
import pandas as pd
12
+ from dateutil .parser import parse
12
13
13
14
from src .pipeline .parsers .flux .log_parsers import (
14
15
null_parser ,
@@ -68,7 +69,6 @@ def get_fishing_activity_type(fishing_activity: ET.Element) -> FluxFishingActivi
68
69
69
70
70
71
def get_fa_report_type (fa_report_document : ET .Element ) -> FluxFAReportDocumentType :
71
-
72
72
report_type = get_text (
73
73
fa_report_document , './/ram:TypeCode[@listID="FLUX_FA_REPORT_TYPE"]'
74
74
)
@@ -169,7 +169,6 @@ def get_operation_type(xml_element):
169
169
170
170
171
171
def parse_metadata (fa_report_document : xml .etree .ElementTree .Element ):
172
-
173
172
metadata = {
174
173
"operation_type" : get_operation_type (fa_report_document ),
175
174
"report_id" : get_text (fa_report_document , './/ram:ID[@schemeID="UUID"]' ),
@@ -208,15 +207,18 @@ def parse_fa_report_document(fa_report_document: ET.Element):
208
207
209
208
children = tagged_children (fa_report_document )
210
209
210
+ activity_datetimes_utc = []
211
211
if "SpecifiedFishingActivity" in children :
212
212
log_types = set ()
213
213
values = []
214
214
for specified_fishing_activity in children ["SpecifiedFishingActivity" ]:
215
- log_type , value = parse_specified_fishing_activity (
215
+ log_type , activity_datetime_utc , value = parse_specified_fishing_activity (
216
216
specified_fishing_activity , report_type
217
217
)
218
218
log_types .add (log_type )
219
219
values .append (value )
220
+ if isinstance (activity_datetime_utc , datetime ):
221
+ activity_datetimes_utc .append (activity_datetime_utc )
220
222
try :
221
223
assert len (log_types ) == 1
222
224
except AssertionError :
@@ -235,7 +237,16 @@ def parse_fa_report_document(fa_report_document: ET.Element):
235
237
else :
236
238
data = dict ()
237
239
238
- fa_report_document_data = {** metadata , ** data }
240
+ if activity_datetimes_utc :
241
+ activity_datetime_utc = min (activity_datetimes_utc )
242
+ else :
243
+ activity_datetime_utc = None
244
+
245
+ fa_report_document_data = {
246
+ "activity_datetime_utc" : activity_datetime_utc ,
247
+ ** metadata ,
248
+ ** data ,
249
+ }
239
250
240
251
return fa_report_document_data
241
252
@@ -274,7 +285,28 @@ def parse_specified_fishing_activity(
274
285
f"Could not find appropriate parser for log type { log_type } : " , e
275
286
)
276
287
value = parser (fishing_activity )
277
- return log_type , value
288
+
289
+ datetime_string = get_text (
290
+ fishing_activity , ".//ram:OccurrenceDateTime/udt:DateTime"
291
+ )
292
+ if not datetime_string :
293
+ datetime_string = get_text (
294
+ fishing_activity ,
295
+ "./ram:SpecifiedDelimitedPeriod/ram:EndDateTime/udt:DateTime" ,
296
+ )
297
+
298
+ if datetime_string :
299
+ try :
300
+ activity_datetime_utc = parse (datetime_string ).replace (tzinfo = None )
301
+ except Exception as e :
302
+ logging .error (
303
+ f"Cound not parse datetime string { datetime_string } with error: { e } "
304
+ )
305
+ activity_datetime_utc = None
306
+ else :
307
+ activity_datetime_utc = None
308
+
309
+ return log_type , activity_datetime_utc , value
278
310
279
311
280
312
def get_list_fa_report_documents (fa_report_message : ET .Element ) -> List [ET .Element ]:
0 commit comments