-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserverless.yml
200 lines (186 loc) · 7.33 KB
/
serverless.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# Welcome to Serverless!
#
# This file is the main config file for your service.
# It's very minimal at this point and uses default values.
# You can always add more config options for more control.
# We've included some commented out config examples here.
# Just uncomment any of them to get that config option.
#
# For full config options, check the docs:
# docs.serverless.com
#
# Happy Coding!
service: ${self:custom.product}-${self:provider.stage}
#useDotenv: true
# app and org for use with dashboard.serverless.com
#app: your-app-name
#org: your-org-name
# You can pin your service to only deploy with a specific Serverless version
# Check out our docs for more details
frameworkVersion: '3'
custom:
DataS3BucketName: data-athena-016436653652 # Unique name of your S3 bucket
DataS3BucketPath: s3://data-athena-016436653652/raw_data/ # Path of data directory of your S3 bucket
OutputS3BucketPath: s3://data-athena-016436653652/athena-output/ # Path of Athena query output data directory of your S3 bucket
product: serverless
bucket: s3
database: gluedb
crawler: gluecrwlr
workgroup: athenawg
query: athenaqry
# you can overwrite defaults here
provider:
name: aws
region: ${opt:region, 'us-east-1'} # change to your region as required
stage: ${opt:stage, 'staging'} # development, stagging, testing, production
profile: ${opt:aws-profile, 'ServerlessUser'}
stackTags:
Env: ${self:provider.stage}
# Resources section defines metadata for the Resources.
# Create IAM Role assumed by the crawler. For demonstration, this role is given all related permissions.
resources:
Resources:
AWSAthenaGlueRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: "Allow"
Principal:
Service:
- "glue.amazonaws.com"
Action:
- "sts:AssumeRole"
Path: "/"
Policies:
- PolicyName: AWSAthenaAccess
PolicyDocument:
Statement:
- Effect: Allow
Action: athena:*
Resource: '*'
- PolicyName: GlueS3Access
PolicyDocument:
Statement:
- Effect: Allow
Action:
- glue:*
- iam:ListRolePolicies
- iam:GetRole
- iam:GetRolePolicy
Resource: '*'
- Effect: Allow
Action:
- s3:*
- s3-object-lambda:*
Resource: '*'
- Effect: Allow
Action:
- logs:*
Resource: '*'
- Effect: Allow
Action:
- s3:GetObject
- s3:PutObject
Resource:
- arn:aws:s3:::aws-glue-*/*
- arn:aws:s3:::*/*aws-glue-*/*
- arn:aws:s3:::aws-glue-*
- Effect: Allow
Action:
- logs:GetLogEvents
Resource:
- arn:aws:logs:*:*:/aws-glue/*
# Create a database to contain tables created by the crawler.
AWSGlueDatabase:
Type: AWS::Glue::Database
Properties:
CatalogId: !Ref AWS::AccountId
DatabaseInput:
Name: database-${self:custom.product}-${self:custom.database}-${self:custom.stage}
Description: database-${self:custom.product}-${self:custom.database}-${self:custom.stage}
# Create a crawler to crawl the data on a Raw Data S3 bucket.
AWSGlueCrawler:
DependsOn:
- AWSAthenaGlueRole
- AWSGlueDatabase
Type: AWS::Glue::Crawler
Properties:
Name: crawler-${self:custom.product}-${self:custom.crawler}-${self:custom.stage}
Description: crawler-${self:custom.product}-${self:custom.crawler}-${self:custom.stage}
Role:
Fn::GetAtt: [ AWSAthenaGlueRole, Arn ]
Schedule:
# Run crawler every day every 6 hours Monday to Friday cron(0 0/6 ? * MON-FRI *)
ScheduleExpression: 'cron(0 0/6 ? * MON-FRI *)'
DatabaseName: !Ref AWSGlueDatabase
Targets:
S3Targets:
- Path: ${self:custom.DataS3BucketPath} # S3 Raw Bucket
Exclusions:
- "**.wav"
- "**.webm"
- "**.zip"
- "**.opus"
- "**.txt"
TablePrefix: !Sub table-${self:custom.product}-${self:custom.crawler}-${self:provider.stage}
SchemaChangePolicy:
UpdateBehavior: "UPDATE_IN_DATABASE"
DeleteBehavior: "LOG"
Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
# Create log group for glue crawler.
AWSGlueCrawlerLogGroup:
Type: AWS::Logs::LogGroup
Properties:
LogGroupName: /aws-glue/crawlers
# RetentionInDays: 1
# Create athena workgroup for query.
AWSAthenaWorkGroup:
Type: AWS::Athena::WorkGroup
Properties:
Name: workgroup-${self:custom.product}-${self:custom.workgroup}-${self:custom.stage}
Description: workgroup-${self:custom.product}-${self:custom.workgroup}-${self:custom.stage}
State: ENABLED
RecursiveDeleteOption: true
WorkGroupConfiguration:
EnforceWorkGroupConfiguration: true
PublishCloudWatchMetricsEnabled: true
# RequesterPaysEnabled: false
ResultConfiguration:
OutputLocation: ${self:custom.OutputS3BucketPath} # S3 Bucket query output directory path
EncryptionConfiguration:
EncryptionOption: 'SSE_S3'
# Create a sample query in Athena where in query we are providing glue database name and glue crawler table name with S3 bucket data directory name.
SampleQueryDate:
DependsOn:
- AWSAthenaWorkGroup
- AWSGlueCrawler
- AWSGlueDatabase
Type: AWS::Athena::NamedQuery
Properties:
Name: query-${self:custom.product}-${self:custom.query}-${self:custom.stage}
Description: query-${self:custom.product}-${self:custom.query}-${self:custom.stage}
Database: !Ref AWSGlueDatabase
QueryString: >
SELECT * FROM "database-${self:provider.stage}"."table-${self:provider.stage}raw_data"
ORDER BY startdate DESC
WorkGroup: !Ref AWSAthenaWorkGroup
# Show out put results after completon of Cloud Formation Stack.
Outputs:
AWSAthenaGlueRole:
Value: !Ref AWSAthenaGlueRole
Export:
Name: !Sub "AWSAthenaGlueRole-Name"
AWSGlueDatabase:
Value: !Ref AWSGlueDatabase
Export:
Name: !Sub "AWSGlueDatabase-name"
AWSGlueCrawler:
Value: !Ref AWSGlueCrawler
Export:
Name: !Sub "AWSGlueCrawler-name"
SampleQueryDate:
Value: !Ref SampleQueryDate
Export:
Name: !Sub "SampleQueryDate-name"