-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserverless.yml
240 lines (224 loc) · 8.26 KB
/
serverless.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# Welcome to Serverless!
#
# This file is the main config file for your service.
# It's very minimal at this point and uses default values.
# You can always add more config options for more control.
# We've included some commented out config examples here.
# Just uncomment any of them to get that config option.
#
# For full config options, check the docs:
# docs.serverless.com
#
# Happy Coding!
service: ${self:custom.product}-${self:provider.stage}
#useDotenv: true
# app and org for use with dashboard.serverless.com
#app: your-app-name
#org: your-org-name
# You can pin your service to only deploy with a specific Serverless version
# Check out our docs for more details
frameworkVersion: '3'
# you can overwrite defaults here
provider:
name: aws
region: ${opt:region, 'us-east-1'} # change to your region as required
stage: ${opt:stage, 'staging'} # development, stagging, testing, production
profile: ${opt:aws-profile, 'ServerlessUser'}
stackTags:
Env: ${self:provider.stage}
# you can define your custom resources here
custom:
product: serverless
bucket: s3
database: gluedb
crawler: gluecrwlr
workgroup: athenawg
query: athenaqry
# Resources section defines metadata for the Resources.
# Create IAM Role assumed by the crawler. For demonstration, this role is given all related permissions.
resources:
Resources:
AWSAthenaGlueRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: "Allow"
Principal:
Service:
- "glue.amazonaws.com"
Action:
- "sts:AssumeRole"
Path: "/"
Policies:
- PolicyName: AWSAthenaAccess
PolicyDocument:
Statement:
- Effect: Allow
Action: athena:*
Resource: '*'
- PolicyName: GlueS3Access
PolicyDocument:
Statement:
- Effect: Allow
Action:
- glue:*
- iam:ListRolePolicies
- iam:GetRole
- iam:GetRolePolicy
Resource: '*'
- Effect: Allow
Action:
- s3:*
- s3-object-lambda:*
Resource: '*'
- Effect: Allow
Action:
- s3:GetObject
- s3:PutObject
Resource:
- arn:aws:s3:::aws-glue-*/*
- arn:aws:s3:::*/*aws-glue-*/*
- arn:aws:s3:::aws-glue-*
PolicyName: AWSCloudWatchLogsAccess
PolicyDocument:
Statement:
- Effect: Allow
Action:
- logs:CreateLogGroup
- logs:CreateLogStream
- logs:GetLogEvents
- logs:PutlogEvents
Resource:
- arn:aws:logs:*:*:/aws-glue/*
# Create a S3 bucket for raw data.
RawDataBucket:
Type: AWS::S3::Bucket
Properties:
BucketName: data-${self:custom.product}-${self:custom.bucket}-${self:custom.stage}
BucketEncryption:
ServerSideEncryptionConfiguration:
-
ServerSideEncryptionByDefault:
SSEAlgorithm: 'AES256'
BucketKeyEnabled: false
PublicAccessBlockConfiguration:
BlockPublicAcls: true
BlockPublicPolicy: true
IgnorePublicAcls: true
RestrictPublicBuckets: true
# Create a S3 bucket for query data backup.
AthenaQueryResultBucket:
Type: AWS::S3::Bucket
Properties:
BucketName: query-result-${self:custom.product}-${self:custom.bucket}-${self:custom.stage}
BucketEncryption:
ServerSideEncryptionConfiguration:
-
ServerSideEncryptionByDefault:
SSEAlgorithm: 'AES256'
BucketKeyEnabled: false
PublicAccessBlockConfiguration:
BlockPublicAcls: true
BlockPublicPolicy: true
IgnorePublicAcls: true
RestrictPublicBuckets: true
LifecycleConfiguration:
Rules:
-
Id: 'auto-delete'
Status: 'Enabled'
ExpirationInDays: 7
# Create a database to contain tables created by the crawler.
AWSGlueDatabase:
Type: AWS::Glue::Database
Properties:
CatalogId: !Ref AWS::AccountId
DatabaseInput:
Name: database-${self:custom.product}-${self:custom.database}-${self:custom.stage}
Description: database-${self:custom.product}-${self:custom.database}-${self:custom.stage}
# Create a crawler to crawl the data on a Raw Data S3 bucket.
AWSGlueCrawler:
DependsOn:
- AWSAthenaGlueRole
- AWSGlueDatabase
Type: AWS::Glue::Crawler
Properties:
Name: crawler-${self:custom.product}-${self:custom.crawler}-${self:custom.stage}
Description: crawler-${self:custom.product}-${self:custom.crawler}-${self:custom.stage}
Role:
Fn::GetAtt: [ AWSAthenaGlueRole, Arn ]
Schedule:
# Run crawler every day every 6 hours Monday to Friday cron(0 0/6 ? * MON-FRI *)
ScheduleExpression: 'cron(0 0/6 ? * MON-FRI *)'
DatabaseName: !Ref AWSGlueDatabase
Targets:
S3Targets:
- Path: !Ref RawDataBucket # S3 Raw Bucket
Exclusions:
- "**.wav"
- "**.webm"
- "**.zip"
- "**.opus"
- "**.txt"
TablePrefix: !Sub table-${self:custom.product}-${self:custom.crawler}-${self:provider.stage}
SchemaChangePolicy:
UpdateBehavior: "UPDATE_IN_DATABASE"
DeleteBehavior: "LOG"
Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
# Create log group for glue crawler.
AWSGlueCrawlerLogGroup:
Type: AWS::Logs::LogGroup
Properties:
LogGroupName: /aws-glue/crawlers
# RetentionInDays: 1
# Create athena workgroup for query.
AWSAthenaWorkGroup:
Type: AWS::Athena::WorkGroup
Properties:
Name: workgroup-${self:custom.product}-${self:custom.workgroup}-${self:custom.stage}
Description: workgroup-${self:custom.product}-${self:custom.workgroup}-${self:custom.stage}
State: ENABLED
RecursiveDeleteOption: true
WorkGroupConfiguration:
EnforceWorkGroupConfiguration: true
PublishCloudWatchMetricsEnabled: true
# RequesterPaysEnabled: false
ResultConfiguration:
OutputLocation: !Ref AthenaQueryResultBucket # S3 Bucket query output directory path
EncryptionConfiguration:
EncryptionOption: 'SSE_S3'
# Create a sample query in Athena where in query we are providing glue database name and glue crawler table name with S3 bucket data directory name.
SampleQueryDate:
DependsOn:
- AWSAthenaWorkGroup
- AWSGlueCrawler
- AWSGlueDatabase
Type: AWS::Athena::NamedQuery
Properties:
Name: query-${self:custom.product}-${self:custom.query}-${self:custom.stage}
Description: query-${self:custom.product}-${self:custom.query}-${self:custom.stage}
Database: !Ref AWSGlueDatabase
QueryString: >
SELECT * FROM "database-${self:provider.stage}"."table-${self:provider.stage}raw_data"
ORDER BY startdate DESC
WorkGroup: !Ref AWSAthenaWorkGroup
# Show out put results after completon of Cloud Formation Stack.
Outputs:
AWSAthenaGlueRole:
Value: !Ref AWSAthenaGlueRole
Export:
Name: !Sub "AWSAthenaGlueRole-Name"
AWSGlueDatabase:
Value: !Ref AWSGlueDatabase
Export:
Name: !Sub "AWSGlueDatabase-name"
AWSGlueCrawler:
Value: !Ref AWSGlueCrawler
Export:
Name: !Sub "AWSGlueCrawler-name"
SampleQueryDate:
Value: !Ref SampleQueryDate
Export:
Name: !Sub "SampleQueryDate-name"