13
13
)
14
14
15
15
with CleanAirflowImport ():
16
- from airflow import models
17
16
from airflow .configuration import conf
18
- from airflow .utils . db import merge_conn
17
+ from airflow .exceptions import AirflowConfigException
19
18
from airflow .utils .dag_processing import list_py_file_paths
20
19
from cwl_airflow .utilities .cwl import overwrite_deprecated_dag
21
20
22
21
23
22
def run_init_config (args ):
24
23
"""
25
24
Runs sequence of steps required to configure CWL-Airflow
26
- for the first time. Safe to run several times
25
+ for the first time. Safe to run several times. Upgrades
26
+ config to correspond to Airflow 2.0.0
27
27
"""
28
28
29
+ create_airflow_config (args ) # will create default airflow.cfg if it wasn't present
30
+ patch_airflow_config (args )
29
31
init_airflow_db (args )
30
- patch_airflow_config (args .config )
31
- # add_connections(args)
32
+
32
33
if args .upgrade :
33
- upgrade_dags (args .config )
34
- copy_dags (args .home )
34
+ upgrade_dags (args )
35
+ copy_dags (args )
36
+
37
+
38
+ def create_airflow_config (args ):
39
+ """
40
+ Runs airflow --help command with AIRFLOW_HOME and AIRFLOW_CONFIG
41
+ environment variables just to create airflow.cfg file
42
+ """
43
+
44
+ custom_env = os .environ .copy ()
45
+ custom_env ["AIRFLOW_HOME" ] = args .home
46
+ custom_env ["AIRFLOW_CONFIG" ] = args .config
47
+ try :
48
+ run (
49
+ ["airflow" , "--help" ],
50
+ env = custom_env ,
51
+ check = True ,
52
+ stdout = DEVNULL ,
53
+ stderr = DEVNULL
54
+ )
55
+ except (FileNotFoundError , CalledProcessError ) as err :
56
+ logging .error (f"""Failed to find or to run airflow executable'. Exiting.\n { err } """ )
57
+ sys .exit (1 )
35
58
36
59
37
60
def init_airflow_db (args ):
38
61
"""
39
62
Sets AIRFLOW_HOME and AIRFLOW_CONFIG from args.
40
- Call airflow initdb from subprocess to make sure
63
+ Call airflow db init from subprocess to make sure
41
64
that the only two things we should care about
42
65
are AIRFLOW_HOME and AIRFLOW_CONFIG
43
66
"""
@@ -47,38 +70,85 @@ def init_airflow_db(args):
47
70
custom_env ["AIRFLOW_CONFIG" ] = args .config
48
71
try :
49
72
run (
50
- ["airflow" , "initdb" ], # TODO: check what 's the difference initdb from updatedb
73
+ ["airflow" , "db" , "init" ], # `db init` always runs `db upgrade` internally, so it 's ok to run only `db init`
51
74
env = custom_env ,
52
75
check = True ,
53
76
stdout = DEVNULL ,
54
77
stderr = DEVNULL
55
78
)
56
- except (CalledProcessError , FileNotFoundError ) as err :
57
- logging .error (f"""Failed to run 'airflow initdb'. Exiting.\n { err } """ )
79
+ except (FileNotFoundError ) as err :
80
+ logging .error (f"""Failed to find airflow executable'. Exiting.\n { err } """ )
81
+ sys .exit (1 )
82
+ except (CalledProcessError ) as err :
83
+ logging .error (f"""Failed to run 'airflow db init'. Delete airflow.db if SQLite was used. Exiting.\n { err } """ )
58
84
sys .exit (1 )
59
85
60
86
61
- def patch_airflow_config (airflow_config ):
87
+ def patch_airflow_config (args ):
62
88
"""
63
- Updates provided Airflow configuration file to include defaults for cwl-airflow.
64
- If something went wrong, restores the original airflow.cfg from the backed up copy
89
+ Updates current Airflow configuration file to include defaults for cwl-airflow.
90
+ If something went wrong, restores the original airflow.cfg from the backed up copy.
91
+ If update to Airflow 2.0.0 is required, generates new airflow.cfg with some of the
92
+ important parameters copied from the old airflow.cfg. Backed up copy is not deleted in
93
+ this case.
65
94
"""
66
95
67
96
# TODO: add cwl section with the following parameters:
68
97
# - singularity
69
98
# - use_container
70
99
100
+ # CWL-Airflow specific settings
71
101
patches = [
72
- ["sed" , "-i" , "-e" , "s/^dags_are_paused_at_creation.*/dags_are_paused_at_creation = False/g" , airflow_config ],
73
- ["sed" , "-i" , "-e" , "s/^load_examples.*/load_examples = False/g" , airflow_config ],
74
- ["sed" , "-i" , "-e" , "s/^logging_config_class.*/logging_config_class = cwl_airflow.config_templates.airflow_local_settings.DEFAULT_LOGGING_CONFIG/g" , airflow_config ],
75
- ["sed" , "-i" , "-e" , "s/^hide_paused_dags_by_default.*/hide_paused_dags_by_default = True/g" , airflow_config ]
102
+ ["sed" , "-i" , "-e" , "s#^dags_are_paused_at_creation.*#dags_are_paused_at_creation = False#g" , args .config ],
103
+ ["sed" , "-i" , "-e" , "s#^load_examples.*#load_examples = False#g" , args .config ],
104
+ ["sed" , "-i" , "-e" , "s#^load_default_connections.*#load_default_connections = False#g" , args .config ],
105
+ ["sed" , "-i" , "-e" , "s#^logging_config_class.*#logging_config_class = cwl_airflow.config_templates.airflow_local_settings.DEFAULT_LOGGING_CONFIG#g" , args .config ],
106
+ ["sed" , "-i" , "-e" , "s#^hide_paused_dags_by_default.*#hide_paused_dags_by_default = True#g" , args .config ]
76
107
]
77
108
78
- airflow_config_backup = airflow_config + "_backup_" + str (uuid .uuid4 ())
109
+ # Minimum amount of setting that should be enough for starting
110
+ # SequentialExecutor, LocalExecutor or CeleryExecutor with
111
+ # the same dags and metadata database after updating to Airflow 2.0.0.
112
+ # All other user specific settings should be manually updated from the
113
+ # backuped airflow.cfg as a lot of them have been refactored.
114
+ transferable_settings = [
115
+ ("core" , "dags_folder" ),
116
+ ("core" , "default_timezone" ),
117
+ ("core" , "executor" ),
118
+ ("core" , "sql_alchemy_conn" ),
119
+ ("core" , "sql_engine_encoding" ), # just in case
120
+ ("core" , "fernet_key" ), # to be able to read from the old database
121
+ ("celery" , "broker_url" ),
122
+ ("celery" , "result_backend" )
123
+ ]
124
+
125
+ # create a temporary backup of airflow.cfg to restore from if we failed to apply patches
126
+ # this backup will be deleted after all patches applied if it wasn't created right before
127
+ # Airflow version update to 2.0.0
128
+ airflow_config_backup = args .config + "_backup_" + str (uuid .uuid4 ())
79
129
try :
80
- shutil .copyfile (airflow_config , airflow_config_backup )
130
+ # reading aiflow.cfg before applying any patches and creating backup
131
+ conf .read (args .config )
132
+ shutil .copyfile (args .config , airflow_config_backup )
133
+
134
+ # check if we need to make airflow.cfg correspond to the Airflow 2.0.0
135
+ # we search for [logging] section as it's present only Airflow >= 2.0.0
136
+ airflow_version_update = not conf .has_section ("logging" )
137
+ if airflow_version_update :
138
+ logging .info ("Airflow config will be upgraded to correspond to Airflow 2.0.0" )
139
+ for section , key in transferable_settings :
140
+ try :
141
+ patches .append (
142
+ ["sed" , "-i" , "-e" , f"s#^{ key } .*#{ key } = { conf .get (section , key )} #g" , args .config ]
143
+ )
144
+ except AirflowConfigException : # just skip missing in the config section/key
145
+ pass
146
+ os .remove (args .config ) # remove old config
147
+ create_airflow_config (args ) # create new airflow.cfg with the default values
148
+
149
+ # Apply all patches
81
150
for patch in patches :
151
+ logging .debug (f"Applying patch { patch } " )
82
152
run (
83
153
patch ,
84
154
shell = False , # for proper handling of filenames with spaces
@@ -89,17 +159,17 @@ def patch_airflow_config(airflow_config):
89
159
except (CalledProcessError , FileNotFoundError ) as err :
90
160
logging .error (f"""Failed to patch Airflow configuration file. Restoring from the backup and exiting.\n { err } """ )
91
161
if os .path .isfile (airflow_config_backup ):
92
- shutil .copyfile (airflow_config_backup , airflow_config )
162
+ shutil .copyfile (airflow_config_backup , args . config )
93
163
sys .exit (1 )
94
164
finally :
95
- if os .path .isfile (airflow_config_backup ):
165
+ if os .path .isfile (airflow_config_backup ) and not airflow_version_update :
96
166
os .remove (airflow_config_backup )
97
167
98
168
99
- def upgrade_dags (airflow_config ):
169
+ def upgrade_dags (args ):
100
170
"""
101
171
Corrects old style DAG python files into the new format.
102
- Reads configuration from "airflow_config ". Uses standard
172
+ Reads configuration from "args.config ". Uses standard
103
173
"conf.get" instead of "conf_get", because the fields we
104
174
use are always set. Copies all deprecated dags into the
105
175
"deprecated_dags" folder, adds deprecated DAGs to the
@@ -109,7 +179,7 @@ def upgrade_dags(airflow_config):
109
179
files remain unchanged.
110
180
"""
111
181
112
- conf .read (airflow_config )
182
+ conf .read (args . config ) # this will read already patched airflow.cfg
113
183
dags_folder = conf .get ("core" , "dags_folder" )
114
184
for dag_location in list_py_file_paths ( # will skip all DAGs from ".airflowignore"
115
185
directory = dags_folder ,
@@ -125,10 +195,10 @@ def upgrade_dags(airflow_config):
125
195
)
126
196
127
197
128
- def copy_dags (airflow_home , source_folder = None ):
198
+ def copy_dags (args , source_folder = None ):
129
199
"""
130
200
Copies *.py files (dags) from source_folder (default ../../extensions/dags)
131
- to dags_folder, which is always {airflow_home }/dags. Overwrites existent
201
+ to dags_folder, which is always {args.home }/dags. Overwrites existent
132
202
files
133
203
"""
134
204
@@ -142,42 +212,9 @@ def copy_dags(airflow_home, source_folder=None):
142
212
"extensions/dags" ,
143
213
)
144
214
145
- target_folder = get_dir (os .path .join (airflow_home , "dags" ))
215
+ target_folder = get_dir (os .path .join (args . home , "dags" ))
146
216
for root , dirs , files in os .walk (source_folder ):
147
217
for filename in files :
148
218
if re .match (".*\\ .py$" , filename ) and filename != "__init__.py" :
149
219
# if not os.path.isfile(os.path.join(target_folder, filename)):
150
220
shutil .copy (os .path .join (root , filename ), target_folder )
151
-
152
-
153
- # not used anymore
154
- def add_connections (args ):
155
- """
156
- Sets AIRFLOW_HOME and AIRFLOW_CONFIG from args.
157
- Call 'airflow connections --add' from subproces to make sure that
158
- the only two things we should care about are AIRFLOW_HOME and
159
- AIRFLOW_CONFIG. Adds "process_report" connections to the Airflow DB
160
- that is used to report workflow execution progress and results.
161
- """
162
-
163
- custom_env = os .environ .copy ()
164
- custom_env ["AIRFLOW_HOME" ] = args .home
165
- custom_env ["AIRFLOW_CONFIG" ] = args .config
166
- try :
167
- run (
168
- [
169
- "airflow" , "connections" , "--add" ,
170
- "--conn_id" , "process_report" ,
171
- "--conn_type" , "http" ,
172
- "--conn_host" , "localhost" ,
173
- "--conn_port" , "3070" ,
174
- "--conn_extra" , "{\" endpoint\" :\" /airflow/\" }"
175
- ],
176
- env = custom_env ,
177
- check = True ,
178
- stdout = DEVNULL ,
179
- stderr = DEVNULL
180
- )
181
- except (CalledProcessError , FileNotFoundError ) as err :
182
- logging .error (f"""Failed to run 'airflow connections --add'. Exiting.\n { err } """ )
183
- sys .exit (1 )
0 commit comments