From 45f0c8b79d8f8ca2d31fc7ddd4e1255dd7cebda3 Mon Sep 17 00:00:00 2001 From: Vincent Gromakowski Date: Mon, 5 Aug 2024 11:27:08 +0200 Subject: [PATCH] change source bucket for the Spark data lake example (#706) --- examples/spark-data-lake/infra/stacks/application_stack.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/spark-data-lake/infra/stacks/application_stack.py b/examples/spark-data-lake/infra/stacks/application_stack.py index 7735e3ac7..a5829e019 100644 --- a/examples/spark-data-lake/infra/stacks/application_stack.py +++ b/examples/spark-data-lake/infra/stacks/application_stack.py @@ -37,14 +37,14 @@ def __init__( ) # Import the S3 bucket used as the Spark job source - source_bucket = s3.Bucket.from_bucket_name(self, 'SourceBucket', 'nyc-tlc') + source_bucket = s3.Bucket.from_bucket_name(self, 'SourceBucket', 'redshift-demos') # Copy the Yellow taxi data in the silver bucket of the data lake dsf.utils.S3DataCopy( self, "YellowDataCopy", source_bucket=source_bucket, - source_bucket_prefix="trip data/yellow_tripdata_2019", + source_bucket_prefix="data/NY-Pub/year=2016/month=1/type=yellow", source_bucket_region="us-east-1", target_bucket= storage.silver_bucket, target_bucket_prefix="yellow-trip-data/", @@ -56,7 +56,7 @@ def __init__( self, "GreenDataCopy", source_bucket=source_bucket, - source_bucket_prefix="trip data/green_tripdata_2019", + source_bucket_prefix="data/NY-Pub/year=2016/month=1/type=green", source_bucket_region="us-east-1", target_bucket= storage.silver_bucket, target_bucket_prefix="green-trip-data/",