From fe1d43877c38e8d7486d8ca0d4c645fa25410a30 Mon Sep 17 00:00:00 2001 From: Kent Huang Date: Thu, 16 May 2024 17:38:48 +0800 Subject: [PATCH 1/2] [Feature] DRC-443 Setup dev-container to integrate with GitHub Codespace - Provide a Docker image for dev-container - Setup google-cloud auth inside the GitHub Codespace - Parse `recce-state` and daily recce staging from existing workflow - Execute `dbt build` and `dbt docs generate` during the launch time - Exectue `recce server` in the end Signed-off-by: Kent Huang --- .devcontainer/Dockerfile | 35 +++++++++++++++ .devcontainer/devcontainer.json | 22 ++++++++++ .devcontainer/docker-build.sh | 3 ++ .devcontainer/github_codespace_env.sh | 30 +++++++++++++ .devcontainer/setup_required_env.sh | 61 +++++++++++++++++++++++++++ .gitignore | 7 ++- .vscode/tasks.json | 40 ++++++++++++++++++ 7 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/docker-build.sh create mode 100644 .devcontainer/github_codespace_env.sh create mode 100644 .devcontainer/setup_required_env.sh create mode 100644 .vscode/tasks.json diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 000000000..f3698c063 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,35 @@ +FROM mcr.microsoft.com/vscode/devcontainers/python:3.11 + +# Add GitHub CLI +RUN (type -p wget >/dev/null || (sudo apt update && sudo apt-get install wget -y)) \ +&& sudo mkdir -p -m 755 /etc/apt/keyrings \ +&& wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \ +&& sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \ +&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null + +RUN apt-get update && apt-get install -y python3-dev gh && rm -rf /var/lib/apt/lists/* + +ARG USER_UID=1000 +ARG USER_GID=$USER_UID + +RUN if [ "$USER_GID" != "1000" ] || [ "$USER_UID" != "1000" ]; then groupmod --gid $USER_GID vscode && usermod --uid $USER_UID --gid $USER_GID vscode; fi + +RUN pip3 install --upgrade pip +RUN pip3 install pipx +RUN pipx install poetry + +# Downloading gcloud package +RUN curl https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz > /tmp/google-cloud-sdk.tar.gz + +# Installing the package +RUN mkdir -p /usr/local/gcloud \ + && tar -C /usr/local/gcloud -xvf /tmp/google-cloud-sdk.tar.gz \ + && /usr/local/gcloud/google-cloud-sdk/install.sh + +# Adding the package path to local +ENV PATH $PATH:/usr/local/gcloud/google-cloud-sdk/bin + +# Install osos python dependencies +COPY . /tmp/oso +RUN cd /tmp/oso && pip3 install . +RUN pip3 install recce-nightly \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..484d5f99d --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,22 @@ +{ + "name": "Recce CodeSpace", + "image": "infuseai/oso-dev-container:3.11", + "containerEnv": { + "DBT_GOOGLE_PROJECT": "infuseai-dev", + "DBT_GOOGLE_DATASET": "oso_playground", + "DBT_GOOGLE_DEV_DATASET": "oso_playground_dev", + "DBT_GOOGLE_KEYFILE": "/home/vscode/.config/gcloud/google-service-account.json", + "RECCE_CI_WORKFLOW_NAME": "OSO Recce CI", + "RECCE_DAILY_CI_WORKFLOW_NAME": "OSO Recce Staging CI" + }, + "customizations": { + "vscode": { + "settings": { + "terminal.integrated.shell.linux": "/bin/bash", + "python.terminal.activateEnvironment": true + } + }, + "extensions": [] + }, + "forwardPorts": [8000] +} \ No newline at end of file diff --git a/.devcontainer/docker-build.sh b/.devcontainer/docker-build.sh new file mode 100644 index 000000000..cb7beff81 --- /dev/null +++ b/.devcontainer/docker-build.sh @@ -0,0 +1,3 @@ +#! /bin/bash + +docker buildx build --platform linux/amd64 -t infuseai/oso-dev-container:3.11 .. -f Dockerfile \ No newline at end of file diff --git a/.devcontainer/github_codespace_env.sh b/.devcontainer/github_codespace_env.sh new file mode 100644 index 000000000..540f52feb --- /dev/null +++ b/.devcontainer/github_codespace_env.sh @@ -0,0 +1,30 @@ +#! /bin/bash + +if [ "${CODESPACES}" == "true" ]; then + # Set the default git repository if running in GitHub Codespaces + echo "Setting the default git repository to $GITHUB_REPOSITORY" + gh repo set-default $GITHUB_REPOSITORY + + current_branch=$(git branch --show-current) + # Check if the current branch is under a pull request + if gh pr view > /dev/null ; then + # Check if the Recce state file is downloaded + run_id=$(gh run list -b ${current_branch} -s success --limit 1 -w "${RECCE_CI_WORKFLOW_NAME}" --json databaseId | jq .[].databaseId) + if [ -z "$run_id" ]; then + echo "No successful Recce run found for the current branch." + else + echo "Downloading the Recce state file for the last successful run." + gh run download $run_id --dir .recce + echo "The Recce state file is downloaded to '.recce/recce_state_file/recce_state.json'." + fi + fi + + # Check daily staging artifact files + default_branch=$(gh repo view --json defaultBranchRef --jq .defaultBranchRef.name) + daily_artifact_workflow_id=$(gh run list -w "${RECCE_DAILY_CI_WORKFLOW_NAME}" --status success -b dev --limit 1 --json databaseId | jq .[].databaseId) + gh run download $daily_artifact_workflow_id --dir .recce + if [ -d ".recce/dbt-artifacts" ]; then + mv .recce/dbt-artifacts target-base + echo "The daily staging artifact files are downloaded to 'target-base'." + fi +fi \ No newline at end of file diff --git a/.devcontainer/setup_required_env.sh b/.devcontainer/setup_required_env.sh new file mode 100644 index 000000000..e09fde804 --- /dev/null +++ b/.devcontainer/setup_required_env.sh @@ -0,0 +1,61 @@ +#! /bin/bash + +GREEN="\033[0;32m" +YELLOW="\033[1;33m" +ENDCOLOR="\033[0m" + +function show_env_hint() { + echo -e "[${YELLOW}Required ENV${ENDCOLOR}] $1 is not set." + cat << EOF + +Please set the following environment variables in your GitHub Codespaces Secrets. +You can set the secret in your GitHub Codespaces Secrets by going to: + GitHub Personal Account -> Settings -> Codespaces -> Codespaces secrets + +Then add the secret with the name $1 and the value of the secret. +After adding the secret, please restart the Codespaces to apply the changes. +EOF +} + +# Check the ENV variables should be provided by the user +if [ -z "$DBT_GOOGLE_PROJECT" ]; then + show_env_hint "DBT_GOOGLE_PROJECT" + exit 1 +fi + +if [ -z "$DBT_GOOGLE_DATASET" ]; then + show_env_hint "DBT_GOOGLE_DATASET" + exit 1 +fi + +if [ -z "$DBT_GOOGLE_DEV_DATASET" ]; then + show_env_hint "DBT_GOOGLE_DEV_DATASET" + exit 1 +fi + +mkdir -p $HOME/.config/gcloud +DBT_GOOGLE_KEYFILE=$HOME/.config/gcloud/google-service-account.json + +# Setup dbt profiles.yml +if [ "$DBT_PROFILES_YML_CONTENT" != '' ]; then + echo "$DBT_PROFILES_YML_CONTENT" > $HOME/.dbt/profiles.yml + echo "dbt profiles.yml is saved to $HOME/.dbt/profiles.yml" +fi + + +# Check if the user is already logged in +if [ -z "$GOOGLE_CLOUD_SERVICE_ACCOUNT_KEY_CONTENT" ]; then + # Change to use OAuth2 to login + if [ -f "${DBT_GOOGLE_KEYFILE}" ]; then + echo "User is already logged in Google cloud" + gcloud auth list + exit 0 + else + echo -e "[${GREEN}Action${ENDCOLOR}] Please login to Google cloud to continue." + gcloud auth application-default login + fi +else + # Use the service account key to login + echo "$GOOGLE_CLOUD_SERVICE_ACCOUNT_KEY_CONTENT" > ${DBT_GOOGLE_KEYFILE} + echo "Google cloud service account key is saved to ${DBT_GOOGLE_KEYFILE}" +fi diff --git a/.gitignore b/.gitignore index e78628001..01512cbde 100644 --- a/.gitignore +++ b/.gitignore @@ -63,4 +63,9 @@ dbt_packages/ supabase/.temp/ **/supabase/.temp/ -*/**/supabase/.temp/ \ No newline at end of file +*/**/supabase/.temp/ + +# Recce & DBT +.recce/ +target/ +target-base/ \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 000000000..715dda479 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,40 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "label": "Check GitHub CodeSpace environment", + "type": "shell", + "command": "bash .devcontainer/github_codespace_env.sh", + }, + { + "label": "Install Project dependencies", + "type": "shell", + "command": "poetry install && poetry env use python", + }, + { + "label": "Setup required environment variables", + "type": "shell", + "command": "bash .devcontainer/setup_required_env.sh", + }, + { + "label": "Run DBT", + "type": "shell", + "command": "poetry run dbt deps && poetry run dbt build && poetry run dbt docs generate", + "dependsOn": [ + "Login Google Cloud for BigQuery", + "Check GitHub CodeSpace environment" + ], + }, + { + "label": "Launch Recce server", + "type": "shell", + "command": "if [ -f '.recce/recce_state_file/recce_state.json' ]; then recce server --review .recce/recce_state_file/recce_state.json; else recce server; fi", + "dependsOn": ["Run DBT"], + "runOptions": { + "runOn": "folderOpen" + } + } + ] +} \ No newline at end of file From 9b75cd3b666c47e346434aa63edf12a9c133b492 Mon Sep 17 00:00:00 2001 From: Kent Huang Date: Tue, 21 May 2024 16:12:01 +0800 Subject: [PATCH 2/2] [Feature] Execute recce by bash script Signed-off-by: Kent Huang --- .devcontainer/launch_recce_server.sh | 12 ++++++++++++ .gitignore | 3 ++- .vscode/tasks.json | 6 +++--- 3 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 .devcontainer/launch_recce_server.sh diff --git a/.devcontainer/launch_recce_server.sh b/.devcontainer/launch_recce_server.sh new file mode 100644 index 000000000..c85c4dbfb --- /dev/null +++ b/.devcontainer/launch_recce_server.sh @@ -0,0 +1,12 @@ +#! /bin/bash + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +if [ -f "${DIR}/../.recce/recce_state_file/recce_state.json" ]; then + echo "Launching the Recce server in review mode. The Recce state file is found." + cp ${DIR}/../.recce/recce_state_file/recce_state.json recce_state.json + recce server --review recce_state.json +else + echo "Launching the Recce server." + recce server +fi \ No newline at end of file diff --git a/.gitignore b/.gitignore index 01512cbde..ab5471596 100644 --- a/.gitignore +++ b/.gitignore @@ -68,4 +68,5 @@ supabase/.temp/ # Recce & DBT .recce/ target/ -target-base/ \ No newline at end of file +target-base/ +recce_state.json \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 715dda479..d9494ac15 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -21,16 +21,16 @@ { "label": "Run DBT", "type": "shell", - "command": "poetry run dbt deps && poetry run dbt build && poetry run dbt docs generate", + "command": "dbt deps && dbt build && dbt docs generate", "dependsOn": [ - "Login Google Cloud for BigQuery", + "Setup required environment variables", "Check GitHub CodeSpace environment" ], }, { "label": "Launch Recce server", "type": "shell", - "command": "if [ -f '.recce/recce_state_file/recce_state.json' ]; then recce server --review .recce/recce_state_file/recce_state.json; else recce server; fi", + "command": "bash .devcontainer/launch_recce_server.sh", "dependsOn": ["Run DBT"], "runOptions": { "runOn": "folderOpen"