File tree Expand file tree Collapse file tree 2 files changed +6
-2
lines changed Expand file tree Collapse file tree 2 files changed +6
-2
lines changed Original file line number Diff line number Diff line change @@ -66,6 +66,7 @@ def create_cluster(
66
66
num_workers : int = 2 ,
67
67
num_local_ssds : int = 1 ,
68
68
autoscaling_policy : str = GCP_AUTOSCALING_POLICY ,
69
+ master_disk_size : int = 500 ,
69
70
) -> DataprocCreateClusterOperator :
70
71
"""Generate an Airflow task to create a Dataproc cluster. Common parameters are reused, and varying parameters can be specified as needed.
71
72
@@ -76,6 +77,7 @@ def create_cluster(
76
77
num_workers (int): Number of worker nodes. Defaults to 2.
77
78
num_local_ssds (int): How many local SSDs to attach to each worker node, both primary and secondary. Defaults to 1.
78
79
autoscaling_policy (str): Name of the autoscaling policy to use. Defaults to GCP_AUTOSCALING_POLICY.
80
+ master_disk_size (int): Size of the master node's boot disk in GB. Defaults to 500.
79
81
80
82
Returns:
81
83
DataprocCreateClusterOperator: Airflow task to create a Dataproc cluster.
@@ -86,7 +88,7 @@ def create_cluster(
86
88
zone = GCP_ZONE ,
87
89
master_machine_type = master_machine_type ,
88
90
worker_machine_type = worker_machine_type ,
89
- master_disk_size = 2000 ,
91
+ master_disk_size = master_disk_size ,
90
92
worker_disk_size = 500 ,
91
93
num_workers = num_workers ,
92
94
image_version = GCP_DATAPROC_IMAGE ,
Original file line number Diff line number Diff line change 63
63
)
64
64
65
65
(
66
- common .create_cluster (CLUSTER_NAME , autoscaling_policy = AUTOSCALING )
66
+ common .create_cluster (
67
+ CLUSTER_NAME , autoscaling_policy = AUTOSCALING , master_disk_size = 2000
68
+ )
67
69
>> common .install_dependencies (CLUSTER_NAME )
68
70
>> study_and_sumstats
69
71
>> window_based_clumping
You can’t perform that action at this time.
0 commit comments