Skip to content

Commit

Permalink
Merge pull request #20 from DataRecce/feature/drc-444-implement-the-g…
Browse files Browse the repository at this point in the history
…ithub-pr-syncer

[Feature] implement the GitHub pr syncer
  • Loading branch information
popcornylu authored May 23, 2024
2 parents 247461e + 1a131dc commit 3f6b2fe
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 1 deletion.
40 changes: 40 additions & 0 deletions .github/workflows/recce-prsync.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# This workflow use the Github PR Syncer https://github.com/dataRecce/github-pr-syncer/
# to sync the PRs from the upstream repository
name: OSO Recce PR Sync

on:
workflow_dispatch:
schedule:
- cron: '30 17 * * *' # run at 1:30 AM (UTC + 8) everyday

jobs:
prsync:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
cache: 'pip'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install git+https://github.com/DataRecce/github-pr-syncer.git
- name: Set up Git
run: |
git config --global user.name "prsync[bot]"
git config --global user.email "prsync[bot]@users.noreply.github.com"
- name: Run pysync
env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# We need the "workflows" permission which the github token for job cannot provide. Use the PersonalAccessToken instead.
GITHUB_TOKEN: ${{ secrets.GH_TOKEN_FOR_ADVANCED_PERMISSIONS }}
run: |
prsync 'DataRecce/oso'
114 changes: 114 additions & 0 deletions .githubprsyncer/.github/workflows/recce-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
name: OSO Recce CI

on:
pull_request:
branches: [main, dev]
paths:
- warehouse/dbt/**

env:
# dbt env variables used in your dbt profiles.yml
DBT_PROFILES_DIR: ./
DBT_GOOGLE_PROJECT: ${{ vars.DBT_GOOGLE_PROJECT }}
DBT_GOOGLE_DATASET: ${{ vars.DBT_GOOGLE_DATASET }}
DBT_GOOGLE_KEYFILE: /tmp/google/google-service-account.json
KEYFILE_CONTENTS: ${{ secrets.KEYFILE_CONTENTS }}

jobs:
check-pull-request:
name: Check pull request by Recce CI
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.12.x"

- name: Install dependencies
run: |
pipx install poetry==1.7.1
poetry install
poetry run which dbt
- name: Install Recce
run: poetry run pip install recce-nightly

- name: Add packages.yml file
run: |
echo '${{ vars.PACKAGES_YAML }}' > packages.yml
- name: Prep Google keyfile
run: |
mkdir -p "$(dirname $DBT_GOOGLE_KEYFILE)"
echo "$KEYFILE_CONTENTS" > $DBT_GOOGLE_KEYFILE
- name: Prepare dbt Base environment
run: |
run_id=$(gh run list --workflow "OSO Recce Staging CI" --repo DataRecce/oso --status success --limit 1 --json databaseId --jq '.[0].databaseId')
gh run download $run_id --repo DataRecce/oso
mv dbt-artifacts target-base
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Set PR Schema
run: echo "DBT_GOOGLE_DEV_DATASET=OSO_PR_${{ github.event.pull_request.number }}" >> $GITHUB_ENV

- name: Prepare dbt Current environment
run: |
source $(poetry env info --path)/bin/activate
dbt deps
dbt build --target ${{ env.DBT_CURRENT_TARGET}}
dbt docs generate --target ${{ env.DBT_CURRENT_TARGET}}
env:
DBT_CURRENT_TARGET: "dev"

- name: Run Recce CI
run: poetry run recce run

- name: Archive Recce State File
uses: actions/upload-artifact@v4
id: recce-artifact-uploader
with:
name: recce-state-file
path: recce_state.json

- name: Prepare Recce Summary
id: recce-summary
run: |
source $(poetry env info --path)/bin/activate
recce summary recce_state.json > recce_summary.md
cat recce_summary.md >> $GITHUB_STEP_SUMMARY
echo '${{ env.NEXT_STEP_MESSAGE }}' >> recce_summary.md
# Handle the case when the recce summary is too long to be displayed in the GitHub PR comment
if [[ `wc -c recce_summary.md | awk '{print $1}'` -ge '65535' ]]; then
echo '# Recce Summary
The recce summary is too long to be displayed in the GitHub PR comment.
Please check the summary detail in the [Job Summary](${{github.server_url}}/${{github.repository}}/actions/runs/${{github.run_id}}) page.
${{ env.NEXT_STEP_MESSAGE }}' > recce_summary.md
fi
env:
ARTIFACT_URL: ${{ steps.recce-artifact-uploader.outputs.artifact-url }}
NEXT_STEP_MESSAGE: |
## Next Steps
If you want to check more detail inforamtion about the recce result, please download the [artifact](${{ steps.recce-artifact-uploader.outputs.artifact-url }}) file and open it by [Recce](https://pypi.org/project/recce/) CLI.
### How to check the recce result
```bash
# Unzip the downloaded artifact file
tar -xf recce-state-file.zip
# Launch the recce server based on the state file
recce server --review recce_state.json
# Open the recce server http://localhost:8000 by your browser
```
- name: Comment on pull request
uses: thollander/actions-comment-pull-request@v2
with:
filePath: recce_summary.md
26 changes: 26 additions & 0 deletions .githubprsyncer/profiles.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
opensource_observer:
outputs:
playground:
type: bigquery
method: service-account
keyfile: "{{ env_var('DBT_GOOGLE_KEYFILE') }}"
project: "{{ env_var('DBT_GOOGLE_PROJECT') }}"
dataset: "{{ env_var('DBT_GOOGLE_DATASET') }}"
job_execution_time_seconds: 300
job_retries: 1
location: US
threads: 32
dev:
type: bigquery
method: service-account
keyfile: "{{ env_var('DBT_GOOGLE_KEYFILE') }}"
project: "{{ env_var('DBT_GOOGLE_PROJECT') }}"
dataset: "{{ env_var('DBT_GOOGLE_DEV_DATASET') }}"
job_execution_time_seconds: 300
job_retries: 1
location: US
threads: 32
# By default we target the playground. it's less costly and also safer to write
# there while developing
target: playground

4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,6 @@ supabase/.temp/
.recce/
target/
target-base/
recce_state.json
recce_state.json
venv/
.envrc

0 comments on commit 3f6b2fe

Please sign in to comment.