Skip to content

Commit d2135d6

Browse files
Ajout d'une colonne activity_datetime_utc dans la table logbook reports (#3548)
## Linked issues - Resolve #3209
2 parents 6d739e7 + 49c5db5 commit d2135d6

File tree

8 files changed

+374
-45
lines changed

8 files changed

+374
-45
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
ALTER TABLE public.logbook_reports
2+
ADD COLUMN activity_datetime_utc TIMESTAMP WITHOUT TIME ZONE;
3+
4+
-- Requête à exécuter après MEP pour ne pas créer de downtime pendant la MEP
5+
6+
-- UPDATE logbook_reports
7+
-- SET activity_datetime_utc = CASE
8+
-- WHEN log_type = 'DEP' THEN (value->>'departureDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
9+
-- WHEN log_type = 'NOT-COE' THEN (value->>'effortZoneEntryDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
10+
-- WHEN log_type = 'COE' THEN (value->>'effortZoneEntryDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
11+
-- WHEN log_type = 'FAR' THEN (SELECT MIN((haul->>'farDatetimeUtc')::TIMESTAMPTZ) AT TIME ZONE 'UTC' FROM jsonb_array_elements(value->'hauls') haul)
12+
-- WHEN log_type = 'CPS' THEN (value->>'cpsDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
13+
-- WHEN log_type = 'DIS' THEN (value->>'discardDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
14+
-- WHEN log_type = 'NOT-COX' THEN (value->>'effortZoneExitDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
15+
-- WHEN log_type = 'COX' THEN (value->>'effortZoneExitDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
16+
-- WHEN log_type = 'CRO' THEN (value->>'effortZoneExitDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
17+
-- WHEN log_type = 'EOF' THEN (value->>'endOfFishingDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
18+
-- WHEN log_type = 'PNO' THEN (value->>'predictedArrivalDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
19+
-- WHEN log_type = 'LAN' THEN (value->>'landingDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
20+
-- WHEN log_type = 'RTP' THEN (value->>'returnDatetimeUtc')::TIMESTAMPTZ AT TIME ZONE 'UTC'
21+
-- ELSE NULL
22+
-- END
23+
-- WHERE log_type IS NOT NULL

datascience/src/pipeline/parsers/ers/log_parsers.py

Lines changed: 83 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
)
1010
from src.pipeline.parsers.utils import (
1111
get_root_tag,
12-
make_datetime_json_serializable,
12+
make_datetime,
13+
serialize_datetime,
1314
tagged_children,
1415
try_float,
1516
)
@@ -23,7 +24,8 @@ def parse_dep(dep):
2324
date = dep.get("DA")
2425
time = dep.get("TI")
2526
# cannot use DateTime because the data needs to be json serializable
26-
departure_datetime_utc = make_datetime_json_serializable(date, time)
27+
activity_datetime_utc = make_datetime(date, time)
28+
departure_datetime_utc = serialize_datetime(activity_datetime_utc)
2729

2830
value = {
2931
"departureDatetimeUtc": departure_datetime_utc,
@@ -41,15 +43,20 @@ def parse_dep(dep):
4143
species_onboard = [parse_spe(spe) for spe in children["SPE"]]
4244
value["speciesOnboard"] = species_onboard
4345

44-
data = {"log_type": "DEP", "value": value}
46+
data = {
47+
"activity_datetime_utc": activity_datetime_utc,
48+
"log_type": "DEP",
49+
"value": value,
50+
}
4551

4652
return data
4753

4854

4955
def parse_far(far):
5056
date = far.get("DA")
5157
time = far.get("TI")
52-
far_datetime_utc = make_datetime_json_serializable(date, time)
58+
activity_datetime_utc = make_datetime(date, time)
59+
far_datetime_utc = serialize_datetime(activity_datetime_utc)
5360

5461
value = {"farDatetimeUtc": far_datetime_utc}
5562

@@ -72,15 +79,20 @@ def parse_far(far):
7279
value["latitude"] = try_float(lat)
7380
value["longitude"] = try_float(lon)
7481

75-
data = {"log_type": "FAR", "value": {"hauls": [value]}}
82+
data = {
83+
"activity_datetime_utc": activity_datetime_utc,
84+
"log_type": "FAR",
85+
"value": {"hauls": [value]},
86+
}
7687

7788
return data
7889

7990

8091
def parse_ecps(ecps):
8192
date = ecps.get("DA")
8293
time = ecps.get("TI")
83-
far_datetime_utc = make_datetime_json_serializable(date, time)
94+
activity_datetime_utc = make_datetime(date, time)
95+
far_datetime_utc = serialize_datetime(activity_datetime_utc)
8496

8597
value = {"cpsDatetimeUtc": far_datetime_utc}
8698

@@ -103,15 +115,20 @@ def parse_ecps(ecps):
103115
value["latitude"] = try_float(lat)
104116
value["longitude"] = try_float(lon)
105117

106-
data = {"log_type": "CPS", "value": value}
118+
data = {
119+
"activity_datetime_utc": activity_datetime_utc,
120+
"log_type": "CPS",
121+
"value": value,
122+
}
107123

108124
return data
109125

110126

111127
def parse_dis(dis):
112128
date = dis.get("DA")
113129
time = dis.get("TI")
114-
discard_datetime_utc = make_datetime_json_serializable(date, time)
130+
activity_datetime_utc = make_datetime(date, time)
131+
discard_datetime_utc = serialize_datetime(activity_datetime_utc)
115132

116133
value = {"discardDatetimeUtc": discard_datetime_utc}
117134

@@ -121,15 +138,20 @@ def parse_dis(dis):
121138
catches = [parse_spe(spe) for spe in children["SPE"]]
122139
value["catches"] = catches
123140

124-
data = {"log_type": "DIS", "value": value}
141+
data = {
142+
"activity_datetime_utc": activity_datetime_utc,
143+
"log_type": "DIS",
144+
"value": value,
145+
}
125146

126147
return data
127148

128149

129150
def parse_coe(coe):
130151
date = coe.get("DA")
131152
time = coe.get("TI")
132-
effort_zone_entry_datetime_utc = make_datetime_json_serializable(date, time)
153+
activity_datetime_utc = make_datetime(date, time)
154+
effort_zone_entry_datetime_utc = serialize_datetime(activity_datetime_utc)
133155

134156
children = tagged_children(coe)
135157

@@ -154,15 +176,20 @@ def parse_coe(coe):
154176
value["latitudeEntered"] = try_float(lat)
155177
value["longitudeEntered"] = try_float(lon)
156178

157-
data = {"log_type": "COE", "value": value}
179+
data = {
180+
"activity_datetime_utc": activity_datetime_utc,
181+
"log_type": "COE",
182+
"value": value,
183+
}
158184

159185
return data
160186

161187

162188
def parse_cox(cox):
163189
date = cox.get("DA")
164190
time = cox.get("TI")
165-
effort_zone_exit_datetime_utc = make_datetime_json_serializable(date, time)
191+
activity_datetime_utc = make_datetime(date, time)
192+
effort_zone_exit_datetime_utc = serialize_datetime(activity_datetime_utc)
166193

167194
children = tagged_children(cox)
168195

@@ -187,7 +214,11 @@ def parse_cox(cox):
187214
value["latitudeExited"] = try_float(lat)
188215
value["longitudeExited"] = try_float(lon)
189216

190-
data = {"log_type": "COX", "value": value}
217+
data = {
218+
"activity_datetime_utc": activity_datetime_utc,
219+
"log_type": "COX",
220+
"value": value,
221+
}
191222

192223
return data
193224

@@ -197,6 +228,8 @@ def parse_cro(cro):
197228

198229
value = {}
199230

231+
activity_datetime_utc = None
232+
200233
if "COE" in children:
201234
assert len(children["COE"]) == 1
202235
coe = children["COE"][0]
@@ -207,24 +240,31 @@ def parse_cro(cro):
207240
assert len(children["COX"]) == 1
208241
cox = children["COX"][0]
209242
cox_data = parse_cox(cox)
243+
activity_datetime_utc = cox_data["activity_datetime_utc"]
210244
cox_value = cox_data["value"]
211245
value = {**value, **cox_value}
212246

213-
data = {"log_type": "CRO", "value": value}
247+
data = {
248+
"activity_datetime_utc": activity_datetime_utc,
249+
"log_type": "CRO",
250+
"value": value,
251+
}
214252
return data
215253

216254

217255
def parse_pno(pno):
218256
date = pno.get("PD")
219257
time = pno.get("PT")
220-
predicted_arrival_datetime_utc = make_datetime_json_serializable(date, time)
258+
activity_datetime_utc = make_datetime(date, time)
259+
predicted_arrival_datetime_utc = serialize_datetime(activity_datetime_utc)
221260

222261
date = pno.get("DA")
223262
time = pno.get("TI")
224-
predicted_landing_datetime_utc = make_datetime_json_serializable(date, time)
263+
predicted_landing_datetime_utc = serialize_datetime(make_datetime(date, time))
225264

226265
start_date = pno.get("DS")
227-
trip_start_date = make_datetime_json_serializable(start_date, None)
266+
activity_datetime_utc = make_datetime(start_date, None)
267+
trip_start_date = serialize_datetime(activity_datetime_utc)
228268

229269
children = tagged_children(pno)
230270

@@ -257,15 +297,20 @@ def parse_pno(pno):
257297
value["latitude"] = try_float(lat)
258298
value["longitude"] = try_float(lon)
259299

260-
data = {"log_type": "PNO", "value": value}
300+
data = {
301+
"activity_datetime_utc": activity_datetime_utc,
302+
"log_type": "PNO",
303+
"value": value,
304+
}
261305

262306
return data
263307

264308

265309
def parse_lan(lan):
266310
date = lan.get("DA")
267311
time = lan.get("TI")
268-
landing_datetime_utc = make_datetime_json_serializable(date, time)
312+
activity_datetime_utc = make_datetime(date, time)
313+
landing_datetime_utc = serialize_datetime(activity_datetime_utc)
269314

270315
value = {
271316
"landingDatetimeUtc": landing_datetime_utc,
@@ -279,24 +324,34 @@ def parse_lan(lan):
279324
catches = [parse_spe(spe) for spe in children["SPE"]]
280325
value["catchLanded"] = catches
281326

282-
data = {"log_type": "LAN", "value": value}
327+
data = {
328+
"activity_datetime_utc": activity_datetime_utc,
329+
"log_type": "LAN",
330+
"value": value,
331+
}
283332

284333
return data
285334

286335

287336
def parse_eof(eof):
288337
date = eof.get("DA")
289338
time = eof.get("TI")
290-
end_of_fishing_datetime_utc = make_datetime_json_serializable(date, time)
339+
activity_datetime_utc = make_datetime(date, time)
340+
end_of_fishing_datetime_utc = serialize_datetime(activity_datetime_utc)
291341
value = {"endOfFishingDatetimeUtc": end_of_fishing_datetime_utc}
292-
data = {"log_type": "EOF", "value": value}
342+
data = {
343+
"activity_datetime_utc": activity_datetime_utc,
344+
"log_type": "EOF",
345+
"value": value,
346+
}
293347
return data
294348

295349

296350
def parse_rtp(rtp):
297351
date = rtp.get("DA")
298352
time = rtp.get("TI")
299-
return_datetime_utc = make_datetime_json_serializable(date, time)
353+
activity_datetime_utc = make_datetime(date, time)
354+
return_datetime_utc = serialize_datetime(activity_datetime_utc)
300355

301356
value = {
302357
"returnDatetimeUtc": return_datetime_utc,
@@ -310,6 +365,10 @@ def parse_rtp(rtp):
310365
gear = [parse_gea(gea) for gea in children["GEA"]]
311366
value["gearOnboard"] = gear
312367

313-
data = {"log_type": "RTP", "value": value}
368+
data = {
369+
"activity_datetime_utc": activity_datetime_utc,
370+
"log_type": "RTP",
371+
"value": value,
372+
}
314373

315374
return data

datascience/src/pipeline/parsers/flux/flux.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from xml.etree.ElementTree import ParseError
1010

1111
import pandas as pd
12+
from dateutil.parser import parse
1213

1314
from src.pipeline.parsers.flux.log_parsers import (
1415
null_parser,
@@ -68,7 +69,6 @@ def get_fishing_activity_type(fishing_activity: ET.Element) -> FluxFishingActivi
6869

6970

7071
def get_fa_report_type(fa_report_document: ET.Element) -> FluxFAReportDocumentType:
71-
7272
report_type = get_text(
7373
fa_report_document, './/ram:TypeCode[@listID="FLUX_FA_REPORT_TYPE"]'
7474
)
@@ -169,7 +169,6 @@ def get_operation_type(xml_element):
169169

170170

171171
def parse_metadata(fa_report_document: xml.etree.ElementTree.Element):
172-
173172
metadata = {
174173
"operation_type": get_operation_type(fa_report_document),
175174
"report_id": get_text(fa_report_document, './/ram:ID[@schemeID="UUID"]'),
@@ -208,15 +207,18 @@ def parse_fa_report_document(fa_report_document: ET.Element):
208207

209208
children = tagged_children(fa_report_document)
210209

210+
activity_datetimes_utc = []
211211
if "SpecifiedFishingActivity" in children:
212212
log_types = set()
213213
values = []
214214
for specified_fishing_activity in children["SpecifiedFishingActivity"]:
215-
log_type, value = parse_specified_fishing_activity(
215+
log_type, activity_datetime_utc, value = parse_specified_fishing_activity(
216216
specified_fishing_activity, report_type
217217
)
218218
log_types.add(log_type)
219219
values.append(value)
220+
if isinstance(activity_datetime_utc, datetime):
221+
activity_datetimes_utc.append(activity_datetime_utc)
220222
try:
221223
assert len(log_types) == 1
222224
except AssertionError:
@@ -235,7 +237,16 @@ def parse_fa_report_document(fa_report_document: ET.Element):
235237
else:
236238
data = dict()
237239

238-
fa_report_document_data = {**metadata, **data}
240+
if activity_datetimes_utc:
241+
activity_datetime_utc = min(activity_datetimes_utc)
242+
else:
243+
activity_datetime_utc = None
244+
245+
fa_report_document_data = {
246+
"activity_datetime_utc": activity_datetime_utc,
247+
**metadata,
248+
**data,
249+
}
239250

240251
return fa_report_document_data
241252

@@ -274,7 +285,28 @@ def parse_specified_fishing_activity(
274285
f"Could not find appropriate parser for log type {log_type}: ", e
275286
)
276287
value = parser(fishing_activity)
277-
return log_type, value
288+
289+
datetime_string = get_text(
290+
fishing_activity, ".//ram:OccurrenceDateTime/udt:DateTime"
291+
)
292+
if not datetime_string:
293+
datetime_string = get_text(
294+
fishing_activity,
295+
"./ram:SpecifiedDelimitedPeriod/ram:EndDateTime/udt:DateTime",
296+
)
297+
298+
if datetime_string:
299+
try:
300+
activity_datetime_utc = parse(datetime_string).replace(tzinfo=None)
301+
except Exception as e:
302+
logging.error(
303+
f"Cound not parse datetime string {datetime_string} with error: {e}"
304+
)
305+
activity_datetime_utc = None
306+
else:
307+
activity_datetime_utc = None
308+
309+
return log_type, activity_datetime_utc, value
278310

279311

280312
def get_list_fa_report_documents(fa_report_message: ET.Element) -> List[ET.Element]:

0 commit comments

Comments
 (0)