3
3
from airflow .decorators import task , task_group
4
4
from airflow .providers .google .cloud .operators .gcs import GCSListObjectsOperator
5
5
from ot_orchestration .types import Manifest_Object
6
- from ot_orchestration .utils import GCSIOManager , get_step_params , get_full_config
6
+ from ot_orchestration .utils import IOManager , get_step_params , get_full_config
7
7
from airflow .models .baseoperator import chain
8
8
from ot_orchestration .utils .manifest import extract_study_id_from_path
9
9
from airflow .utils .edgemodifier import Label
@@ -57,6 +57,8 @@ def get_new_sumstat_paths(
57
57
def collect_sumstats_and_generate_new_manifests (
58
58
ti : TaskInstance | None = None ,
59
59
) -> list [Manifest_Object ]:
60
+ if ti is None :
61
+ raise ValueError ("Task instance is None" )
60
62
task_id : str = ti .xcom_pull (task_ids = "manifest_preparation.get_execution_mode" )
61
63
logging .info ("TASK ID: %s" , task_id )
62
64
new_sumstats = ti .xcom_pull (task_ids = task_id )
@@ -97,6 +99,8 @@ def amend_curation_metadata(new_manifests: list[Manifest_Object]):
97
99
params = get_step_params ("manifest_preparation" )
98
100
logging .info ("USING FOLLOWING PARAMS: %s" , params )
99
101
curation_path = params ["manual_curation_manifest" ]
102
+ if not isinstance (curation_path , str ):
103
+ raise ValueError ("Curation path is not a string" )
100
104
logging .info ("CURATING MANIFESTS WITH: %s" , curation_path )
101
105
curation_df = pd .read_csv (curation_path , sep = "\t " ).drop (
102
106
columns = ["publicationTitle" , "traitFromSource" , "qualityControl" ]
@@ -116,18 +120,20 @@ def amend_curation_metadata(new_manifests: list[Manifest_Object]):
116
120
def read_manifests (manifest_paths : list [str ]) -> list [Manifest_Object ]:
117
121
"""Read manifests."""
118
122
manifest_paths = [f"gs://{ path } " for path in manifest_paths ]
119
- return GCSIOManager ().load_many (manifest_paths )
123
+ return IOManager ().load_many (manifest_paths )
120
124
121
125
122
126
@task (task_id = "save_config" )
123
127
def save_config (task_instance : TaskInstance | None = None ) -> str :
124
128
"""Save configuration for batch processing."""
129
+ if task_instance is None :
130
+ raise ValueError ("Task instance is None" )
125
131
run_id = task_instance .run_id
126
132
params = get_step_params ("manifest_preparation" )
127
133
full_config = get_full_config ().serialize ()
128
134
config_path = f"gs://{ params ['staging_bucket' ]} /{ params ['staging_prefix' ]} /{ run_id } /config.yaml"
129
135
logging .info ("DUMPING CONFIG TO THE FOLLOWING PATH: %s" , config_path )
130
- GCSIOManager ().dump ( gcs_path = config_path , data = full_config )
136
+ IOManager ().resolve ( config_path ). dump ( full_config )
131
137
return config_path
132
138
133
139
@@ -137,6 +143,8 @@ def save_config(task_instance: TaskInstance | None = None) -> str:
137
143
)
138
144
def choose_manifest_paths (ti : TaskInstance | None = None ) -> list [str ]:
139
145
"""Choose manifests to pass to the next."""
146
+ if ti is None :
147
+ raise ValueError ("Task instance is None" )
140
148
task_id : str = ti .xcom_pull (task_ids = "manifest_preparation.get_execution_mode" )
141
149
logging .info ("TASK ID: %s" , task_id )
142
150
if not task_id .endswith ("read_manifests" ):
@@ -150,7 +158,7 @@ def save_manifests(manifests: list[Manifest_Object]) -> list[Manifest_Object]:
150
158
"""Write manifests to persistant storage."""
151
159
manifest_paths = [manifest ["manifestPath" ] for manifest in manifests ]
152
160
logging .info ("MANIFEST PATHS: %s" , manifest_paths )
153
- GCSIOManager ().dump_many (manifests , manifest_paths )
161
+ IOManager ().dump_many (manifests , manifest_paths )
154
162
return manifests
155
163
156
164
@@ -164,7 +172,6 @@ def exit_when_no_new_sumstats(new_sumstats: dict[str, str]) -> bool:
164
172
@task_group (group_id = TASK_GROUP_ID )
165
173
def gwas_catalog_manifest_preparation ():
166
174
"""Prepare initial manifest."""
167
-
168
175
fetch_existing_manifests = GCSListObjectsOperator (
169
176
task_id = "list_existing_manifests" ,
170
177
bucket = "{{ params.steps.manifest_preparation.staging_bucket }}" ,
0 commit comments