diff --git a/Dockerfile b/Dockerfile index 22bdba8..ac4246c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,19 +3,19 @@ # See README for operational details ## -# Top level build args ARG build_for=linux/amd64 ## -# base image (abstract) +# Base: system deps + Python + dbt + re_data (single stage for clarity and to ensure all tools in final image) ## -FROM --platform=$build_for python:3.11.11-slim-bullseye as base +FROM --platform=$build_for python:3.11.11-slim-bullseye AS base LABEL maintainer=support@fast.bi -# System setup +# System packages (jq, git, gcloud, cron, etc.) RUN apt-get update \ && apt-get dist-upgrade -y \ && apt-get install -y --no-install-recommends \ + jq \ git \ ssh-client \ software-properties-common \ @@ -26,58 +26,53 @@ RUN apt-get update \ curl \ apt-transport-https \ gnupg \ - cl-base64 \ - cron -RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && apt-get update -y && apt-get install google-cloud-cli -y -RUN apt-get clean \ - && rm -rf \ - /var/lib/apt/lists/* \ - /tmp/* \ - /var/tmp/* + coreutils \ + cron \ + && echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \ + && curl -sSf https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - \ + && apt-get update -y \ + && apt-get install -y google-cloud-cli \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -# Env vars +# Python env ENV PYTHONIOENCODING=utf-8 ENV LANG=C.UTF-8 +ENV PYTHONWARNINGS=ignore +ENV RE_DATA_SEND_ANONYMOUS_USAGE_STATS=0 +ENV DBT_WARN_ERROR_OPTIONS='{"exclude": ["*"]}' -# Update python -# Pin setuptools < 81 to ensure pkg_resources is available for re_data 0.11.0 -RUN python -m pip install --upgrade pip "setuptools<81" wheel yq pytz pandas colorama --no-cache-dir +# Pin setuptools < 81 for re_data 0.11.0 (pkg_resources) +RUN python -m pip install --no-cache-dir --upgrade pip "setuptools<81" wheel -# Set up work directory -WORKDIR /usr/app/dbt/ +# dbt adapters + re_data + yq (one layer for better cache) +RUN python -m pip install --no-cache-dir \ + yq \ + pytz \ + pandas \ + colorama \ + re-data==0.11.0 \ + dbt-bigquery==1.9.2 \ + dbt-snowflake==1.9.4 \ + dbt-redshift==1.9.5 \ + dbt-fabric==1.9.6 -## -# dbt packages layer - this will be cached -## -FROM base as dbt-packages -# Ensure setuptools with pkg_resources is installed before re_data -RUN python -m pip install --no-cache-dir "setuptools<81" -RUN python -m pip install --no-cache-dir dbt-bigquery==1.9.2 -RUN python -m pip install --no-cache-dir dbt-snowflake==1.9.4 -RUN python -m pip install --no-cache-dir dbt-redshift==1.9.5 -RUN python -m pip install --no-cache-dir dbt-fabric==1.9.6 -RUN python -m pip install --no-cache-dir re-data==0.11.0 +# Symlinks for CLI +RUN ln -sf /usr/local/bin/dbt /usr/bin/dbt \ + && ln -sf /usr/local/bin/re_data /usr/bin/re_data -# Create symlinks for commands -RUN ln -s /usr/local/bin/dbt /usr/bin/dbt -RUN ln -s /usr/local/bin/re_data /usr/bin/re_data +# Verify jq (and other tools) are present so build fails if apt layer is cached wrong +RUN command -v jq >/dev/null 2>&1 || (echo "FATAL: jq not found in image" && exit 1) + +WORKDIR /usr/app/dbt/ ## -# Final image with scripts - this layer will be rebuilt when scripts change +# Final: add scripts only (rebuild when scripts change) ## -FROM dbt-packages as dbt-bigquery-re-data +FROM base AS final LABEL maintainer=support@fast.bi -# Copy scripts at the end so only this layer is rebuilt when scripts change -COPY ./api-entrypoint.sh /usr/app/dbt/ -COPY ./cron_redata.sh /usr/app/dbt/ -COPY ./backfill_redata.sh /usr/app/dbt/ - -# Set permissions in a single layer -RUN chmod 755 /usr/app/dbt/api-entrypoint.sh \ - && chmod 755 /usr/app/dbt/cron_redata.sh \ - && chmod 755 /usr/app/dbt/backfill_redata.sh - -ENV RE_DATA_SEND_ANONYMOUS_USAGE_STATS=0 +COPY ./api-entrypoint.sh ./cron_redata.sh ./backfill_redata.sh /usr/app/dbt/ +RUN chmod 755 /usr/app/dbt/api-entrypoint.sh /usr/app/dbt/cron_redata.sh /usr/app/dbt/backfill_redata.sh -ENTRYPOINT ["/bin/bash", "-c", "/usr/app/dbt/api-entrypoint.sh" ] \ No newline at end of file +ENTRYPOINT ["/bin/bash", "-c", "/usr/app/dbt/api-entrypoint.sh"] diff --git a/api-entrypoint.sh b/api-entrypoint.sh index fa76936..c900cff 100644 --- a/api-entrypoint.sh +++ b/api-entrypoint.sh @@ -53,9 +53,6 @@ catch() { trap 'catch $? $LINENO' EXIT -# Disable re_data anonymous usage / Segment calls -export RE_DATA_SEND_ANONYMOUS_USAGE_STATS=0 - # Create required directories mkdir -p /data || { echo "Failed to create /data directory" >&2 @@ -119,29 +116,19 @@ cd "/data/dbt/${DBT_REPO_NAME}" || { log "Failed to change to dbt directory" "ER # Ensure dbt_project.yml has target-path for re_data compatibility DBT_PROJECT_FILE="dbt_project.yml" if [ -f "${DBT_PROJECT_FILE}" ]; then - if ! grep -q '^[[:space:]]*target-path:' "${DBT_PROJECT_FILE}"; then - log "Adding default target-path to ${DBT_PROJECT_FILE} for re_data compatibility" - python - << 'PY' -from pathlib import Path - -try: - import yaml # type: ignore -except ImportError: - # Fallback: append a simple line if PyYAML is unavailable - path = Path("dbt_project.yml") - text = path.read_text() - if "target-path" not in text: - text = text.rstrip() + "\n\ntarget-path: target\n" - path.write_text(text) -else: - path = Path("dbt_project.yml") - data = yaml.safe_load(path.read_text()) or {} - if "target-path" not in data: - data["target-path"] = "target" - path.write_text(yaml.safe_dump(data, sort_keys=False)) -PY + if command -v yq &> /dev/null; then + TARGET_PATH_VALUE=$(yq -r '."target-path" // empty' "${DBT_PROJECT_FILE}" 2>/dev/null || echo "") + + if [ -z "${TARGET_PATH_VALUE}" ] || [ "${TARGET_PATH_VALUE}" = "null" ] || [ "${TARGET_PATH_VALUE}" = "empty" ]; then + log "Adding default target-path to ${DBT_PROJECT_FILE} for re_data compatibility" + yq -Y '."target-path" //= "target"' ${DBT_PROJECT_FILE} > /tmp/dbt_tmp.yml && mv /tmp/dbt_tmp.yml ${DBT_PROJECT_FILE} + log "Successfully added target-path to ${DBT_PROJECT_FILE}" + else + log "target-path already present in ${DBT_PROJECT_FILE}" + fi else - log "target-path already present in ${DBT_PROJECT_FILE}" + log "yq command not found" "ERROR" + exit 1 fi else log "dbt_project.yml not found in repo root" "WARN" diff --git a/build.sh b/build.sh index 01a837d..b0c99e0 100644 --- a/build.sh +++ b/build.sh @@ -11,11 +11,17 @@ catch() { } trap 'catch $? $LINENO' EXIT -init_version="v1.0.8" +init_version="v0.1.3" + +# docker buildx build . \ +# --pull \ +# --tag europe-central2-docker.pkg.dev/fast-bi-common/bi-platform/tsb-redata-core:${init_version} \ +# --platform linux/amd64 \ +# --push docker buildx build . \ --pull \ - --tag europe-central2-docker.pkg.dev/fast-bi-common/bi-platform/tsb-redata-core:${init_version} \ + --tag 4fastbi/data-quality-core:${init_version} \ + --tag 4fastbi/data-quality-core:latest \ --platform linux/amd64 \ - --push - + --push \ No newline at end of file diff --git a/cron_redata.sh b/cron_redata.sh index 405fccd..8dc7e97 100644 --- a/cron_redata.sh +++ b/cron_redata.sh @@ -95,13 +95,22 @@ log "Updating DBT catalog" # Check and update dbt_project.yml with target-path: "target" if necessary DBT_PROJECT_FILE="/data/dbt/${DBT_REPO_NAME}/dbt_project.yml" -TARGET_PATH_LINE="target-path: \"target\"" -if ! grep -qF "${TARGET_PATH_LINE}" "${DBT_PROJECT_FILE}"; then - log "Adding target-path configuration to dbt_project.yml" - echo "${TARGET_PATH_LINE}" >> "${DBT_PROJECT_FILE}" -else - log "target-path is already configured in dbt_project.yml" +if [ -f "${DBT_PROJECT_FILE}" ]; then + if command -v yq &> /dev/null; then + TARGET_PATH_VALUE=$(yq -r '."target-path" // empty' "${DBT_PROJECT_FILE}" 2>/dev/null || echo "") + + if [ -z "${TARGET_PATH_VALUE}" ] || [ "${TARGET_PATH_VALUE}" = "null" ] || [ "${TARGET_PATH_VALUE}" = "empty" ]; then + log "Adding target-path configuration to dbt_project.yml" + yq -Y '."target-path" //= "target"' "${DBT_PROJECT_FILE}" > /tmp/dbt_tmp.yml && mv /tmp/dbt_tmp.yml "${DBT_PROJECT_FILE}" + log "Successfully added target-path to ${DBT_PROJECT_FILE}" + else + log "target-path is already configured in dbt_project.yml" + fi + else + log "yq command not found" "ERROR" + exit 1 + fi fi # Date calculations