.github/workflows/ec2-pipeline-debug.yml

name: CI-CD-Compelete-Deployment-Gradio-Debug
on: 
  workflow_dispatch:
jobs:

  # build-and-push-ecr-image:
  #   name: Build and push ECR image
  #   runs-on: ubuntu-latest
  #   outputs:
  #     commit_id: ${{ steps.get_commit_id.outputs.commit_id }}
  #   steps:
  #     - name: Checkout Code
  #       uses: actions/checkout@v3

  #     - name: Install Utilities
  #       run: |
  #         sudo apt-get update
  #         sudo apt-get install -y jq unzip
  #     - name: Configure AWS credentials
  #       uses: aws-actions/configure-aws-credentials@v1
  #       with:
  #         aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
  #         aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
  #         aws-region: ${{ secrets.AWS_REGION }}

  #     - name: Login to Amazon ECR
  #       id: login-ecr
  #       uses: aws-actions/amazon-ecr-login@v1

  #     - name: Get latest commit ID
  #       id: get_commit_id
  #       run: |
  #         latest_commit=$(git rev-parse HEAD)
  #         echo "::set-output name=commit_id::$latest_commit"


  #     - name: Display the commit ID
  #       run: |
  #         echo "Latest commit ID is: ${{ steps.get_commit_id.outputs.commit_id }}"


  #     - name: Build, tag, and push image to Amazon ECR
  #       id: build-image
  #       env:
  #         ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
  #         ECR_REPOSITORY: ${{ secrets.ECR_REPOSITORY_NAME }}
  #         IMAGE_TAG: latest
  #       run: |
  #         # Build a docker container and
  #         # push it to ECR so that it can
  #         # be deployed to ECS.
  #         docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .
  #         docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
  #         echo "::set-output name=image::$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG"

  launch-runner:
    runs-on: ubuntu-latest
    # needs: build-and-push-ecr-image
    outputs:
      commit_id: ${{ steps.get_commit_id_runner.outputs.commit_id }}
    steps:
      - uses: actions/checkout@v3
      - uses: iterative/setup-cml@v2

      - name: Display the commit ID
        run: |
          echo "Latest commit ID is: ${{ needs.build-and-push-ecr-image.outputs.commit_id }}"

      - name: Get latest commit ID
        id: get_commit_id_runner
        run: |
          echo "::set-output name=commit_id::${{ needs.build-and-push-ecr-image.outputs.commit_id }}"

      - name: Deploy runner on AWS EC2 
        env:
          REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
        run: |
          cml runner launch \
          --cloud=aws \
          --name=session-10 \
          --cloud-region=ap-south-1 \
          --cloud-type=g4dn.xlarge \
          --cloud-hdd-size=64 \
          --cloud-spot \
          --single \
          --labels=cml-gpu \
          --idle-timeout=100 \
          --cloud-gpu=nvidia-tesla-t4

  train-and-deploy:
    runs-on: [cml-gpu]
    needs: launch-runner
    outputs:
      commit_id: ${{ steps.get_commit_id_ec2.outputs.commit_id }}
    timeout-minutes: 20
    container:
      image: docker://pytorch/pytorch:2.3.1-cuda11.8-cudnn8-runtime
      options: --gpus all
    # runs-on: ubuntu-latest
    steps:

      - name: Verify EC2 Instance
        run: |
          DEBIAN_FRONTEND=noninteractive apt update && apt upgrade -y && apt install -y curl wget git unzip zip tar lsb-release
          curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
          unzip awscliv2.zip
          ./aws/install -i ~/aws-cli -b ~/aws-cli/bin
          export PATH=/github/home/aws-cli/bin:$PATH
          apt-get install -y groff
          aws --version

          echo "Checking instance information..."
          # Check if we're on EC2
          # TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
          # curl -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/instance-type
          
          echo "Checking system resources..."
          lscpu
          free -h
          df -h
          nvidia-smi  # This will show GPU if available
          
          echo "Checking environment..."
          lsb_release -a
          env | grep AWS || true
          hostname
          whoami
          pwd
          # Install the AWS CLI if not already available
          # if ! command -v aws &> /dev/null; then
          #   apt-get update
          #   apt-get install -y awscli
          # fi


          curl -fsSL https://get.docker.com -o get-docker.sh
          sh get-docker.sh
          docker --version
          
          # Get ECR login command and execute it
          # $(aws ecr get-login-password --no-include-email --region ap-south-1)
          export AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}
          export AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}
          export AWS_DEFAULT_REGION=ap-south-1
          # aws sts get-caller-identity
          # aws ecr get-login-password --region ap-south-1 | docker login --username AWS --password-stdin 306093656765.dkr.ecr.ap-south-1.amazonaws.com
          # aws ecr get-login-password --region ap-south-1 | docker login --username AWS --password-stdin 306093656765.dkr.ecr.ap-south-1.amazonaws.com

      - name: Install jq
        run: |
          apt-get update
          apt-get install -y jq

        # Deployment on Huggingface
      - name: Install Gradio
        run: |
          mkdir gradio_demo
          cd gradio_demo/
          touch dummy.txt
          apt update
          apt install python3-pip -y
          apt install wget -y
          wget -qO- https://astral.sh/uv/install.sh | sh

          export PATH=$PATH:/root/.local/bin
          export PATH=$PATH:/github/home/aws-cli/bin/
          export PATH=$PATH:/github/home/.local/bin/

          set +e 
          uv python install 3.12
          uv venv --python 3.12.0
          . .venv/bin/activate || true
          bash .venv/bin/activate || true
          python --version || true
          python3 --version || true
          touch requirements.txt
          echo "--extra-index-url https://download.pytorch.org/whl/cpu" > requirements.txt
          echo "gradio" >> requirements.txt
          echo "torch==2.4.1+cpu" >> requirements.txt
          echo "torchvision==0.19.1+cpu" >> requirements.txt
          echo "pillow==11.0.0" >> requirements.txt
          echo "huggingface_hub==0.26.2" >> requirements.txt
          uv pip install -r requirements.txt --system || true
          uv pip3 install -r requirements.txt  || true
          # python3 -m pip install -r requirements.txt

      - name: Log in to Hugging Face
        run: uv run python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'

      - name: Deploy to Spaces
        run: | 
          uv run gradio deploy || true

        # https://cml.dev/doc/self-hosted-runners#gitlab-ciyml
        # --cloud-gpu={nogpu,k80,v100,tesla}
        # TODO: aws cli configuration "aws sts get-caller-identity" in docker