serverless.yml

# Welcome to Serverless!
#
# This file is the main config file for your service.
# It's very minimal at this point and uses default values.
# You can always add more config options for more control.
# We've included some commented out config examples here.
# Just uncomment any of them to get that config option.
#
# For full config options, check the docs:
#    docs.serverless.com
#
# Happy Coding!

service: ${self:custom.product}-${self:provider.stage}

#useDotenv: true

# app and org for use with dashboard.serverless.com
#app: your-app-name
#org: your-org-name

# You can pin your service to only deploy with a specific Serverless version
# Check out our docs for more details
frameworkVersion: '3'

# you can overwrite defaults here
provider:
  name: aws
  region: ${opt:region, 'us-east-1'}                        # change to your region as required
  stage: ${opt:stage, 'staging'}                            # development, stagging, testing, production
  profile: ${opt:aws-profile, 'ServerlessUser'}
  stackTags:
    Env: ${self:provider.stage}

# you can define your custom resources here
custom:
  product: serverless
  bucket: s3
  database: gluedb
  crawler: gluecrwlr
  workgroup: athenawg
  query: athenaqry

# Resources section defines metadata for the Resources.
# Create IAM Role assumed by the crawler. For demonstration, this role is given all related permissions.
resources:
  Resources:
    AWSAthenaGlueRole:
      Type: AWS::IAM::Role
      Properties:
        AssumeRolePolicyDocument:
          Version: "2012-10-17"
          Statement:
            - Effect: "Allow"
              Principal:
                Service:
                  - "glue.amazonaws.com"
              Action:
                - "sts:AssumeRole"
        Path: "/"
        Policies:
          - PolicyName: AWSAthenaAccess
            PolicyDocument:
              Statement:
                - Effect: Allow
                  Action: athena:*
                  Resource: '*'
          - PolicyName: GlueS3Access
            PolicyDocument:
              Statement:
                - Effect: Allow
                  Action:
                    - glue:*
                    - iam:ListRolePolicies
                    - iam:GetRole
                    - iam:GetRolePolicy
                  Resource: '*'
                - Effect: Allow
                  Action:
                    - s3:*
                    - s3-object-lambda:*
                  Resource: '*'
                - Effect: Allow
                  Action:
                    - s3:GetObject
                    - s3:PutObject
                  Resource:
                    - arn:aws:s3:::aws-glue-*/*
                    - arn:aws:s3:::*/*aws-glue-*/*
                    - arn:aws:s3:::aws-glue-*
            PolicyName: AWSCloudWatchLogsAccess
            PolicyDocument:
              Statement:
                - Effect: Allow
                  Action:
                    - logs:CreateLogGroup
                    - logs:CreateLogStream
                    - logs:GetLogEvents
                    - logs:PutlogEvents
                  Resource:
                    - arn:aws:logs:*:*:/aws-glue/*

# Create a S3 bucket for raw data.
    RawDataBucket:
      Type: AWS::S3::Bucket
      Properties:
        BucketName: data-${self:custom.product}-${self:custom.bucket}-${self:custom.stage}
        BucketEncryption:
            ServerSideEncryptionConfiguration:
              -
                ServerSideEncryptionByDefault:
                    SSEAlgorithm: 'AES256'
                BucketKeyEnabled: false
        PublicAccessBlockConfiguration:
          BlockPublicAcls: true
          BlockPublicPolicy: true
          IgnorePublicAcls: true
          RestrictPublicBuckets: true
 
# Create a S3 bucket for query data backup.
    AthenaQueryResultBucket:
      Type: AWS::S3::Bucket
      Properties:
        BucketName: query-result-${self:custom.product}-${self:custom.bucket}-${self:custom.stage}
        BucketEncryption:
            ServerSideEncryptionConfiguration:
              -
                ServerSideEncryptionByDefault:
                    SSEAlgorithm: 'AES256'
                BucketKeyEnabled: false
        PublicAccessBlockConfiguration:
          BlockPublicAcls: true
          BlockPublicPolicy: true
          IgnorePublicAcls: true
          RestrictPublicBuckets: true
        LifecycleConfiguration:
          Rules:
            -
              Id: 'auto-delete'
              Status: 'Enabled'
              ExpirationInDays: 7

# Create a database to contain tables created by the crawler.
    AWSGlueDatabase:
      Type: AWS::Glue::Database
      Properties:
        CatalogId: !Ref AWS::AccountId
        DatabaseInput:
          Name: database-${self:custom.product}-${self:custom.database}-${self:custom.stage}
          Description: database-${self:custom.product}-${self:custom.database}-${self:custom.stage}

# Create a crawler to crawl the data on a Raw Data S3 bucket.
    AWSGlueCrawler:
      DependsOn:
        - AWSAthenaGlueRole
        - AWSGlueDatabase
      Type: AWS::Glue::Crawler
      Properties:
        Name: crawler-${self:custom.product}-${self:custom.crawler}-${self:custom.stage}
        Description: crawler-${self:custom.product}-${self:custom.crawler}-${self:custom.stage}
        Role:
          Fn::GetAtt: [ AWSAthenaGlueRole, Arn ]
        Schedule:
          # Run crawler every day every 6 hours Monday to Friday cron(0 0/6 ? * MON-FRI *)
          ScheduleExpression: 'cron(0 0/6 ? * MON-FRI *)'
        DatabaseName: !Ref AWSGlueDatabase
        Targets:
          S3Targets:
            - Path: !Ref RawDataBucket                # S3 Raw Bucket
              Exclusions:
                - "**.wav"
                - "**.webm"
                - "**.zip"
                - "**.opus"
                - "**.txt"
        TablePrefix: !Sub table-${self:custom.product}-${self:custom.crawler}-${self:provider.stage}
        SchemaChangePolicy:
          UpdateBehavior: "UPDATE_IN_DATABASE"
          DeleteBehavior: "LOG"
        Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"

# Create log group for glue crawler.
    AWSGlueCrawlerLogGroup:
      Type: AWS::Logs::LogGroup
      Properties:
        LogGroupName: /aws-glue/crawlers
#        RetentionInDays: 1

# Create athena workgroup for query.
    AWSAthenaWorkGroup:
      Type: AWS::Athena::WorkGroup
      Properties:
        Name: workgroup-${self:custom.product}-${self:custom.workgroup}-${self:custom.stage}
        Description: workgroup-${self:custom.product}-${self:custom.workgroup}-${self:custom.stage}
        State: ENABLED
        RecursiveDeleteOption: true
        WorkGroupConfiguration:
          EnforceWorkGroupConfiguration: true
          PublishCloudWatchMetricsEnabled: true
#         RequesterPaysEnabled: false
          ResultConfiguration:
            OutputLocation: !Ref AthenaQueryResultBucket     # S3 Bucket query output directory path
            EncryptionConfiguration:
              EncryptionOption: 'SSE_S3'

# Create a sample query in Athena where in query we are providing glue database name and glue crawler table name with S3 bucket data directory name.
    SampleQueryDate:
      DependsOn:
        - AWSAthenaWorkGroup
        - AWSGlueCrawler
        - AWSGlueDatabase
      Type: AWS::Athena::NamedQuery
      Properties:
        Name: query-${self:custom.product}-${self:custom.query}-${self:custom.stage}
        Description: query-${self:custom.product}-${self:custom.query}-${self:custom.stage}
        Database: !Ref AWSGlueDatabase
        QueryString: >
                      SELECT * FROM "database-${self:provider.stage}"."table-${self:provider.stage}raw_data"
                      ORDER BY startdate DESC
        WorkGroup: !Ref AWSAthenaWorkGroup

# Show out put results after completon of Cloud Formation Stack.
  Outputs:
    AWSAthenaGlueRole:
      Value: !Ref AWSAthenaGlueRole
      Export:
        Name: !Sub "AWSAthenaGlueRole-Name"
    AWSGlueDatabase:
      Value: !Ref AWSGlueDatabase
      Export:
        Name: !Sub "AWSGlueDatabase-name"
    AWSGlueCrawler:
      Value: !Ref AWSGlueCrawler
      Export:
        Name: !Sub "AWSGlueCrawler-name"
    SampleQueryDate:
      Value: !Ref SampleQueryDate
      Export:
        Name: !Sub "SampleQueryDate-name"