Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ RUN apt-get clean \
ENV PYTHONIOENCODING=utf-8
ENV LANG=C.UTF-8

# Update python
RUN python -m pip install --upgrade pip setuptools wheel --no-cache-dir
# Update python packages
RUN python -m pip install --upgrade pip setuptools wheel yq pytz pandas colorama --no-cache-dir

##
# dbt-bigquery with all packages
Expand Down
88 changes: 83 additions & 5 deletions cron_dbt_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#

set -o errexit
set -o pipefail
set -o nounset

catch() {
echo 'catching!'
if [ "$1" != "0" ]; then
Expand All @@ -24,14 +27,89 @@ while [ $# -gt 0 ]; do
shift
done

echo 'Start realoading DBT Workload'
echo 'Start reloading DBT Workload'
echo 'Checking dependencies'
echo 'Update DBT-Project Repo files'
echo "Working on dbt-project directory ${DBT_REPO_NAME}"
if cd /data/dbt/"${DBT_REPO_NAME}"; then git config pull.rebase true; git reset --hard; git pull; else git clone "${GITLINK_SECRET}" /data/dbt/; fi
DBT_DIR="/data/dbt/${DBT_REPO_NAME}"
if cd "${DBT_DIR}"; then
git config pull.rebase true
git reset --hard
git pull
else
git clone "${GITLINK_SECRET}" /data/dbt/
cd "${DBT_DIR}"
fi

# Function to check and modify packages.yml if needed
check_and_modify_packages() {
local airflow_vars_file="${1:-dbt_airflow_variables.yml}"
local packages_file="${2:-packages.yml}"

# If the airflow vars file is not an absolute path, treat it as relative to the dbt project dir
if [[ "${airflow_vars_file}" != /* ]]; then
airflow_vars_file="./${airflow_vars_file}"
fi
if [[ "${packages_file}" != /* ]]; then
packages_file="./${packages_file}"
fi

if [ ! -f "${airflow_vars_file}" ]; then
echo "⚠️ [WARNING] Airflow variables file not found: ${airflow_vars_file}. Skipping DATA_QUALITY check."
return 0
fi

if [ ! -f "${packages_file}" ]; then
echo "⚠️ [WARNING] packages.yml not found: ${packages_file}. Skipping package modification."
return 0
fi

# Parse DATA_QUALITY similarly to mr_e2e_workflow.yaml
local data_quality
data_quality="$(grep -E '^[[:space:]]*DATA_QUALITY:' "${airflow_vars_file}" | tail -n1 | awk '{print $2}' | tr -d "'" | tr -d '"')"

# Only modify if the re-data package exists in packages.yml
if grep -q 're-data/re_data' "${packages_file}"; then
if [ "${data_quality}" = "false" ] || [ "${data_quality}" = "False" ]; then
echo "⚠️ [WARNING] DATA_QUALITY is ${data_quality}. Removing re-data package from ${packages_file}"

if command -v yq >/dev/null 2>&1; then
# yq v4 syntax (same as mr_e2e_workflow.yaml)
yq 'del(.packages[] | select(.package == "re-data/re_data"))' "${packages_file}" > "${packages_file}.tmp"
else
# Fallback removal (no yq): drop the '- package: re-data/re_data' block
awk '
BEGIN { skip = 0 }
{
if (skip == 1) {
if ($0 ~ /^[[:space:]]*-[[:space:]]*package:/) {
skip = 0
} else {
next
}
}
if ($0 ~ /^[[:space:]]*-[[:space:]]*package:[[:space:]]*re-data\/re_data[[:space:]]*$/) {
skip = 1
next
}
print
}
' "${packages_file}" > "${packages_file}.tmp"
fi

mv "${packages_file}.tmp" "${packages_file}"

# Force recompilation by removing manifest (if present)
rm -f "./target/manifest.json"
echo "✅ [SUCCESS] re-data package removed and manifest cleared"
fi
fi
}

# Always check DATA_QUALITY and adjust packages.yml before dbt deps
check_and_modify_packages "${AIRFLOW_SECRET_FILE_NAME:-dbt_airflow_variables.yml}" "packages.yml"

echo 'Update dbt packages'
/usr/local/bin/dbt deps --profiles-dir /data/dbt/"${DBT_REPO_NAME}"/ --project-dir /data/dbt/"${DBT_REPO_NAME}"/
echo 'Generate dbt docs'
/usr/local/bin/dbt docs generate --profiles-dir /data/dbt/"${DBT_REPO_NAME}"/ --project-dir /data/dbt/"${DBT_REPO_NAME}"/

trap 'catch $? $LINENO' EXIT
/usr/local/bin/dbt docs generate --profiles-dir /data/dbt/"${DBT_REPO_NAME}"/ --project-dir /data/dbt/"${DBT_REPO_NAME}"/