-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6389354
commit 3d047db
Showing
8 changed files
with
241 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
20 changes: 20 additions & 0 deletions
20
datascience/src/pipeline/data/non_commited_data/dummy_pno_ports_subscriptions.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
control_unit_id,port_locode,receive_all_pnos | ||
10052,FRALM,false | ||
10052,FRPOV,false | ||
10052,FRSRL,false | ||
10052,FRCR2,false | ||
10952,FRLCT,false | ||
10952,FRPDB,true | ||
10952,FRPSL,true | ||
10952,MQKF4,true | ||
10952,FRAMA,true | ||
10952,FRCR2,true | ||
10952,FRETB,true | ||
10052,FRLCT,true | ||
15652,FRMRS,true | ||
15659,FRMTU,false | ||
15659,FRPDB,false | ||
15659,FRPSL,false | ||
15659,FRRYR,false | ||
15659,FRXSR,false | ||
15657,YTDZA,false |
8 changes: 8 additions & 0 deletions
8
datascience/src/pipeline/data/non_commited_data/dummy_pno_segments_subscriptions.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
control_unit_id,segment | ||
10052,seg_1 | ||
10052,seg_2 | ||
15652,seg_1 | ||
15659,seg_5 | ||
15659,seg_4 | ||
15659,seg_9 | ||
15657,seg_1 |
6 changes: 6 additions & 0 deletions
6
datascience/src/pipeline/data/non_commited_data/dummy_pno_vessels_subscriptions.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
control_unit_id,cfr | ||
1,ABCDEFGHIJ1 | ||
1,ABCDEFGHIJ2 | ||
2,ABCDEFGHIJ3 | ||
5,ABCDEFGHIJ1 | ||
1,ABCDEFGHIJ7 |
102 changes: 102 additions & 0 deletions
102
datascience/src/pipeline/flows/init_pno_subscriptions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
from pathlib import Path | ||
|
||
import pandas as pd | ||
import prefect | ||
from prefect import Flow, Parameter, case, task | ||
from prefect.executors import LocalDaskExecutor | ||
|
||
from config import NON_COMMITED_DATA_LOCATION | ||
from src.pipeline.generic_tasks import load | ||
from src.pipeline.shared_tasks.control_flow import check_flow_not_running | ||
|
||
|
||
@task(checkpoint=False) | ||
def extract_pno_ports_subscriptions(filename: str): | ||
return pd.read_csv( | ||
NON_COMMITED_DATA_LOCATION / filename, | ||
encoding="utf8", | ||
dtype={"receive_all_pnos": bool}, | ||
) | ||
|
||
|
||
@task(checkpoint=False) | ||
def extract_pno_segments_subscriptions(filename: str): | ||
return pd.read_csv( | ||
NON_COMMITED_DATA_LOCATION / filename, | ||
encoding="utf8", | ||
) | ||
|
||
|
||
@task(checkpoint=False) | ||
def extract_pno_vessels_subscriptions(filename: str): | ||
return pd.read_csv( | ||
NON_COMMITED_DATA_LOCATION / filename, | ||
encoding="utf8", | ||
) | ||
|
||
|
||
@task(checkpoint=False) | ||
def load_pno_ports_subscriptions(pno_ports_subscriptions: pd.DataFrame): | ||
logger = prefect.context.get("logger") | ||
load( | ||
pno_ports_subscriptions, | ||
table_name="pno_ports_subscriptions", | ||
schema="public", | ||
db_name="monitorfish_remote", | ||
logger=logger, | ||
how="replace", | ||
) | ||
|
||
|
||
@task(checkpoint=False) | ||
def load_pno_segments_subscriptions(pno_segments_subscriptions: pd.DataFrame): | ||
logger = prefect.context.get("logger") | ||
load( | ||
pno_segments_subscriptions, | ||
table_name="pno_segments_subscriptions", | ||
schema="public", | ||
db_name="monitorfish_remote", | ||
logger=logger, | ||
how="replace", | ||
) | ||
|
||
|
||
@task(checkpoint=False) | ||
def load_pno_vessels_subscriptions(pno_vessels_subscriptions: pd.DataFrame): | ||
logger = prefect.context.get("logger") | ||
load( | ||
pno_vessels_subscriptions, | ||
table_name="pno_vessels_subscriptions", | ||
schema="public", | ||
db_name="monitorfish_remote", | ||
logger=logger, | ||
how="replace", | ||
) | ||
|
||
|
||
with Flow("Init pno subscriptions", executor=LocalDaskExecutor()) as flow: | ||
flow_not_running = check_flow_not_running() | ||
with case(flow_not_running, True): | ||
pno_ports_subscriptions_file_name = Parameter( | ||
"pno_ports_subscriptions_file_name" | ||
) | ||
pno_segments_subscriptions_file_name = Parameter( | ||
"pno_segments_subscriptions_file_name" | ||
) | ||
pno_vessels_subscriptions_file_name = Parameter( | ||
"pno_vessels_subscriptions_file_name" | ||
) | ||
pno_ports_subscriptions = extract_pno_ports_subscriptions( | ||
pno_ports_subscriptions_file_name | ||
) | ||
pno_segments_subscriptions = extract_pno_segments_subscriptions( | ||
pno_segments_subscriptions_file_name | ||
) | ||
pno_vessels_subscriptions = extract_pno_vessels_subscriptions( | ||
pno_vessels_subscriptions_file_name | ||
) | ||
load_pno_ports_subscriptions(pno_ports_subscriptions) | ||
load_pno_segments_subscriptions(pno_segments_subscriptions) | ||
load_pno_vessels_subscriptions(pno_vessels_subscriptions) | ||
|
||
flow.file_name = Path(__file__).name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
84 changes: 84 additions & 0 deletions
84
datascience/tests/test_pipeline/test_flows/test_init_pno_subscriptions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
import pandas as pd | ||
|
||
from src.pipeline.flows.init_pno_subscriptions import flow | ||
from src.read_query import read_query | ||
from tests.mocks import mock_check_flow_not_running | ||
|
||
flow.replace(flow.get_tasks("check_flow_not_running")[0], mock_check_flow_not_running) | ||
|
||
|
||
def test_flow(reset_test_data): | ||
pno_ports_subscriptions_query = ( | ||
"SELECT * FROM pno_ports_subscriptions ORDER BY control_unit_id, port_locode" | ||
) | ||
pno_segments_subscriptions_query = ( | ||
"SELECT * FROM pno_segments_subscriptions ORDER BY control_unit_id, segment" | ||
) | ||
pno_vessels_subscriptions_query = ( | ||
"SELECT * FROM pno_vessels_subscriptions ORDER BY control_unit_id, cfr" | ||
) | ||
initial_pno_ports_subscriptions = read_query( | ||
pno_ports_subscriptions_query, db="monitorfish_remote" | ||
) | ||
initial_pno_segments_subscriptions = read_query( | ||
pno_segments_subscriptions_query, db="monitorfish_remote" | ||
) | ||
initial_pno_vessels_subscriptions = read_query( | ||
pno_vessels_subscriptions_query, db="monitorfish_remote" | ||
) | ||
|
||
flow.schedule = None | ||
state = flow.run( | ||
pno_ports_subscriptions_file_name="dummy_pno_ports_subscriptions.csv", | ||
pno_segments_subscriptions_file_name="dummy_pno_segments_subscriptions.csv", | ||
pno_vessels_subscriptions_file_name="dummy_pno_vessels_subscriptions.csv", | ||
) | ||
assert state.is_successful() | ||
|
||
pno_ports_subscriptions_first_run = read_query( | ||
pno_ports_subscriptions_query, db="monitorfish_remote" | ||
) | ||
pno_segments_subscriptions_first_run = read_query( | ||
pno_segments_subscriptions_query, db="monitorfish_remote" | ||
) | ||
pno_vessels_subscriptions_first_run = read_query( | ||
pno_vessels_subscriptions_query, db="monitorfish_remote" | ||
) | ||
|
||
assert len(initial_pno_ports_subscriptions) == 0 | ||
assert len(initial_pno_segments_subscriptions) == 0 | ||
assert len(initial_pno_vessels_subscriptions) == 0 | ||
|
||
assert len(pno_ports_subscriptions_first_run) == 19 | ||
assert len(pno_segments_subscriptions_first_run) == 7 | ||
assert len(pno_vessels_subscriptions_first_run) == 5 | ||
|
||
# Re-running should succeed and lead to the same pno types | ||
state = flow.run( | ||
pno_ports_subscriptions_file_name="dummy_pno_ports_subscriptions.csv", | ||
pno_segments_subscriptions_file_name="dummy_pno_segments_subscriptions.csv", | ||
pno_vessels_subscriptions_file_name="dummy_pno_vessels_subscriptions.csv", | ||
) | ||
assert state.is_successful() | ||
|
||
pno_ports_subscriptions_second_run = read_query( | ||
pno_ports_subscriptions_query, db="monitorfish_remote" | ||
) | ||
pno_segments_subscriptions_second_run = read_query( | ||
pno_segments_subscriptions_query, db="monitorfish_remote" | ||
) | ||
pno_vessels_subscriptions_second_run = read_query( | ||
pno_vessels_subscriptions_query, db="monitorfish_remote" | ||
) | ||
|
||
pd.testing.assert_frame_equal( | ||
pno_ports_subscriptions_first_run, pno_ports_subscriptions_second_run | ||
) | ||
|
||
pd.testing.assert_frame_equal( | ||
pno_segments_subscriptions_first_run, pno_segments_subscriptions_second_run | ||
) | ||
|
||
pd.testing.assert_frame_equal( | ||
pno_vessels_subscriptions_first_run, pno_vessels_subscriptions_second_run | ||
) |