Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 41 additions & 46 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
# See README for operational details
##

# Top level build args
ARG build_for=linux/amd64

##
# base image (abstract)
# Base: system deps + Python + dbt + re_data (single stage for clarity and to ensure all tools in final image)
##
FROM --platform=$build_for python:3.11.11-slim-bullseye as base
FROM --platform=$build_for python:3.11.11-slim-bullseye AS base
LABEL maintainer=support@fast.bi

# System setup
# System packages (jq, git, gcloud, cron, etc.)
RUN apt-get update \
&& apt-get dist-upgrade -y \
&& apt-get install -y --no-install-recommends \
jq \
git \
ssh-client \
software-properties-common \
Expand All @@ -26,58 +26,53 @@ RUN apt-get update \
curl \
apt-transport-https \
gnupg \
cl-base64 \
cron
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && apt-get update -y && apt-get install google-cloud-cli -y
RUN apt-get clean \
&& rm -rf \
/var/lib/apt/lists/* \
/tmp/* \
/var/tmp/*
coreutils \
cron \
&& echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \
&& curl -sSf https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - \
&& apt-get update -y \
&& apt-get install -y google-cloud-cli \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

# Env vars
# Python env
ENV PYTHONIOENCODING=utf-8
ENV LANG=C.UTF-8
ENV PYTHONWARNINGS=ignore
ENV RE_DATA_SEND_ANONYMOUS_USAGE_STATS=0
ENV DBT_WARN_ERROR_OPTIONS='{"exclude": ["*"]}'

# Update python
# Pin setuptools < 81 to ensure pkg_resources is available for re_data 0.11.0
RUN python -m pip install --upgrade pip "setuptools<81" wheel yq pytz pandas colorama --no-cache-dir
# Pin setuptools < 81 for re_data 0.11.0 (pkg_resources)
RUN python -m pip install --no-cache-dir --upgrade pip "setuptools<81" wheel

# Set up work directory
WORKDIR /usr/app/dbt/
# dbt adapters + re_data + yq (one layer for better cache)
RUN python -m pip install --no-cache-dir \
yq \
pytz \
pandas \
colorama \
re-data==0.11.0 \
dbt-bigquery==1.9.2 \
dbt-snowflake==1.9.4 \
dbt-redshift==1.9.5 \
dbt-fabric==1.9.6

##
# dbt packages layer - this will be cached
##
FROM base as dbt-packages
# Ensure setuptools with pkg_resources is installed before re_data
RUN python -m pip install --no-cache-dir "setuptools<81"
RUN python -m pip install --no-cache-dir dbt-bigquery==1.9.2
RUN python -m pip install --no-cache-dir dbt-snowflake==1.9.4
RUN python -m pip install --no-cache-dir dbt-redshift==1.9.5
RUN python -m pip install --no-cache-dir dbt-fabric==1.9.6
RUN python -m pip install --no-cache-dir re-data==0.11.0
# Symlinks for CLI
RUN ln -sf /usr/local/bin/dbt /usr/bin/dbt \
&& ln -sf /usr/local/bin/re_data /usr/bin/re_data

# Create symlinks for commands
RUN ln -s /usr/local/bin/dbt /usr/bin/dbt
RUN ln -s /usr/local/bin/re_data /usr/bin/re_data
# Verify jq (and other tools) are present so build fails if apt layer is cached wrong
RUN command -v jq >/dev/null 2>&1 || (echo "FATAL: jq not found in image" && exit 1)

WORKDIR /usr/app/dbt/

##
# Final image with scripts - this layer will be rebuilt when scripts change
# Final: add scripts only (rebuild when scripts change)
##
FROM dbt-packages as dbt-bigquery-re-data
FROM base AS final
LABEL maintainer=support@fast.bi

# Copy scripts at the end so only this layer is rebuilt when scripts change
COPY ./api-entrypoint.sh /usr/app/dbt/
COPY ./cron_redata.sh /usr/app/dbt/
COPY ./backfill_redata.sh /usr/app/dbt/

# Set permissions in a single layer
RUN chmod 755 /usr/app/dbt/api-entrypoint.sh \
&& chmod 755 /usr/app/dbt/cron_redata.sh \
&& chmod 755 /usr/app/dbt/backfill_redata.sh

ENV RE_DATA_SEND_ANONYMOUS_USAGE_STATS=0
COPY ./api-entrypoint.sh ./cron_redata.sh ./backfill_redata.sh /usr/app/dbt/
RUN chmod 755 /usr/app/dbt/api-entrypoint.sh /usr/app/dbt/cron_redata.sh /usr/app/dbt/backfill_redata.sh

ENTRYPOINT ["/bin/bash", "-c", "/usr/app/dbt/api-entrypoint.sh" ]
ENTRYPOINT ["/bin/bash", "-c", "/usr/app/dbt/api-entrypoint.sh"]
37 changes: 12 additions & 25 deletions api-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ catch() {

trap 'catch $? $LINENO' EXIT

# Disable re_data anonymous usage / Segment calls
export RE_DATA_SEND_ANONYMOUS_USAGE_STATS=0

# Create required directories
mkdir -p /data || {
echo "Failed to create /data directory" >&2
Expand Down Expand Up @@ -119,29 +116,19 @@ cd "/data/dbt/${DBT_REPO_NAME}" || { log "Failed to change to dbt directory" "ER
# Ensure dbt_project.yml has target-path for re_data compatibility
DBT_PROJECT_FILE="dbt_project.yml"
if [ -f "${DBT_PROJECT_FILE}" ]; then
if ! grep -q '^[[:space:]]*target-path:' "${DBT_PROJECT_FILE}"; then
log "Adding default target-path to ${DBT_PROJECT_FILE} for re_data compatibility"
python - << 'PY'
from pathlib import Path

try:
import yaml # type: ignore
except ImportError:
# Fallback: append a simple line if PyYAML is unavailable
path = Path("dbt_project.yml")
text = path.read_text()
if "target-path" not in text:
text = text.rstrip() + "\n\ntarget-path: target\n"
path.write_text(text)
else:
path = Path("dbt_project.yml")
data = yaml.safe_load(path.read_text()) or {}
if "target-path" not in data:
data["target-path"] = "target"
path.write_text(yaml.safe_dump(data, sort_keys=False))
PY
if command -v yq &> /dev/null; then
TARGET_PATH_VALUE=$(yq -r '."target-path" // empty' "${DBT_PROJECT_FILE}" 2>/dev/null || echo "")

if [ -z "${TARGET_PATH_VALUE}" ] || [ "${TARGET_PATH_VALUE}" = "null" ] || [ "${TARGET_PATH_VALUE}" = "empty" ]; then
log "Adding default target-path to ${DBT_PROJECT_FILE} for re_data compatibility"
yq -Y '."target-path" //= "target"' ${DBT_PROJECT_FILE} > /tmp/dbt_tmp.yml && mv /tmp/dbt_tmp.yml ${DBT_PROJECT_FILE}
log "Successfully added target-path to ${DBT_PROJECT_FILE}"
else
log "target-path already present in ${DBT_PROJECT_FILE}"
fi
else
log "target-path already present in ${DBT_PROJECT_FILE}"
log "yq command not found" "ERROR"
exit 1
fi
else
log "dbt_project.yml not found in repo root" "WARN"
Expand Down
14 changes: 10 additions & 4 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,17 @@ catch() {
}
trap 'catch $? $LINENO' EXIT

init_version="v1.0.8"
init_version="v0.1.3"

# docker buildx build . \
# --pull \
# --tag europe-central2-docker.pkg.dev/fast-bi-common/bi-platform/tsb-redata-core:${init_version} \
# --platform linux/amd64 \
# --push

docker buildx build . \
--pull \
--tag europe-central2-docker.pkg.dev/fast-bi-common/bi-platform/tsb-redata-core:${init_version} \
--tag 4fastbi/data-quality-core:${init_version} \
--tag 4fastbi/data-quality-core:latest \
--platform linux/amd64 \
--push

--push
21 changes: 15 additions & 6 deletions cron_redata.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,22 @@ log "Updating DBT catalog"

# Check and update dbt_project.yml with target-path: "target" if necessary
DBT_PROJECT_FILE="/data/dbt/${DBT_REPO_NAME}/dbt_project.yml"
TARGET_PATH_LINE="target-path: \"target\""

if ! grep -qF "${TARGET_PATH_LINE}" "${DBT_PROJECT_FILE}"; then
log "Adding target-path configuration to dbt_project.yml"
echo "${TARGET_PATH_LINE}" >> "${DBT_PROJECT_FILE}"
else
log "target-path is already configured in dbt_project.yml"
if [ -f "${DBT_PROJECT_FILE}" ]; then
if command -v yq &> /dev/null; then
TARGET_PATH_VALUE=$(yq -r '."target-path" // empty' "${DBT_PROJECT_FILE}" 2>/dev/null || echo "")

if [ -z "${TARGET_PATH_VALUE}" ] || [ "${TARGET_PATH_VALUE}" = "null" ] || [ "${TARGET_PATH_VALUE}" = "empty" ]; then
log "Adding target-path configuration to dbt_project.yml"
yq -Y '."target-path" //= "target"' "${DBT_PROJECT_FILE}" > /tmp/dbt_tmp.yml && mv /tmp/dbt_tmp.yml "${DBT_PROJECT_FILE}"
log "Successfully added target-path to ${DBT_PROJECT_FILE}"
else
log "target-path is already configured in dbt_project.yml"
fi
else
log "yq command not found" "ERROR"
exit 1
fi
fi

# Date calculations
Expand Down
Loading