From 8375cf1f9f6a15e10d147d6f15c1ebde6be472b0 Mon Sep 17 00:00:00 2001 From: Floris272 Date: Tue, 7 Oct 2025 15:37:29 +0200 Subject: [PATCH 1/7] :sparkles: [open-api-framework#188] add csv option to data dump script --- .github/workflows/bin-check.yml | 13 ++++++ bin/dump_data.sh | 79 +++++++++++++++++++++++---------- docs/manual/scripts.rst | 5 ++- 3 files changed, 73 insertions(+), 24 deletions(-) diff --git a/.github/workflows/bin-check.yml b/.github/workflows/bin-check.yml index dea2a029..cc7a1483 100644 --- a/.github/workflows/bin-check.yml +++ b/.github/workflows/bin-check.yml @@ -56,3 +56,16 @@ jobs: run: | createdb -h localhost -U postgres test psql -v ON_ERROR_STOP=1 -h localhost -U postgres -d test -f dump.sql + + - name: dump data to csv + run: | + mkdir csv_exports + SCRIPTPATH=bin DUMP_FILE=dump.sql bin/dump_data.sh --csv + + - name: validate csv dump + run: | + test -f csv_exports/core_object.csv || exit 1 + ! test -f csv_exports/auth_group.csv || exit 1 + grep "id,uuid,object_type_id,created_on,modified_on" csv_exports/core_object.csv + + diff --git a/bin/dump_data.sh b/bin/dump_data.sh index eba3f170..569894dc 100755 --- a/bin/dump_data.sh +++ b/bin/dump_data.sh @@ -10,7 +10,8 @@ # or --combined which appends the data dump to the schema dump. # The schema dump could not use -t to filter tables because this excludes extensions like postgis in the dump. # pg_dump also does not add related tables automatically, so `dump_data.sh` does not add related data from accounts to the dump. - +# +# with --csv and csv dump can be created for all tables in the given components. The csv files will be generated in CSV_OUTPUT_DIR (csv_exports by default). set -e @@ -28,31 +29,35 @@ SCRIPTPATH=$(dirname "$SCRIPT") ${SCRIPTPATH}/wait_for_db.sh DUMP_FILE=${DUMP_FILE:-"dump_$(date +'%Y-%m-%d_%H-%M-%S').sql"} +CSV_OUTPUT_DIR=${CSV_OUTPUT_DIR:-"csv_exports"} +CSV=false SCHEMA=true DATA=true COMBINED=false for arg in "$@"; do - case "$arg" in + case "$arg" in + --csv) CSV=true ;; --schema-only) DATA=false ;; - --data-only) SCHEMA=false ;; - --combined) COMBINED=true ;; + --data-only) SCHEMA=false ;; + --combined) COMBINED=true ;; --*) - echo "Unknown flag: $arg" - exit 1 - ;; + echo "Unknown flag: $arg" + exit 1 + ;; *) - APPS+=("$arg") ;; - esac + APPS+=("$arg") + ;; + esac done # export given apps or export DEFAULT_APPS if [ "${#APPS[@]}" -eq 0 ]; then - APPS=("${DEFAULT_APPS[@]}") + APPS=("${DEFAULT_APPS[@]}") fi ->&2 echo "exporting: ${APPS[*]}" +echo >&2 "exporting: ${APPS[*]}" # create -t flags for each app INCLUDES=() @@ -61,32 +66,60 @@ for app in "${APPS[@]}"; do done dump_schema() { - echo "Dumping schema to $1..." - pg_dump --schema-only -f "$1" + echo "Dumping schema to $1..." + pg_dump --schema-only -f "$1" } dump_data() { - echo "Dumping data to $1..." - pg_dump "${INCLUDES[@]}" --disable-triggers --data-only > "$1" + echo "Dumping data to $1..." + pg_dump "${INCLUDES[@]}" --disable-triggers --data-only >"$1" } append_data() { - echo "Appending data to $1..." - pg_dump "${INCLUDES[@]}" --disable-triggers --data-only \ - | sed '/^SET\|^SELECT pg_catalog.set_config/d' >> "$1" + echo "Appending data to $1..." + pg_dump "${INCLUDES[@]}" --disable-triggers --data-only | + sed '/^SET\|^SELECT pg_catalog.set_config/d' >>"$1" } +dump_csv() { + echo "Dumping data to csv..." + + WHERE_CLAUSE="" + for app in "${APPS[@]}"; do + if [ -n "$WHERE_CLAUSE" ]; then + WHERE_CLAUSE+=" OR " + fi + WHERE_CLAUSE+="tablename LIKE '${app}_%'" + done + + TABLES=$(psql -Atc "SELECT tablename FROM pg_tables WHERE schemaname='public' AND ($WHERE_CLAUSE);") + + for table in $TABLES; do + echo "dumping $table..." + psql -c "\copy $table TO '$CSV_OUTPUT_DIR/$table.csv' WITH CSV HEADER" + done +} + +if $CSV; then + + if [ ! -d "$CSV_OUTPUT_DIR" ]; then + echo "csv output directory $CSV_OUTPUT_DIR does not exist in current path" + else + dump_csv + fi + exit 0 +fi if $COMBINED; then - dump_schema "$DUMP_FILE" - append_data "$DUMP_FILE" - exit 0 + dump_schema "$DUMP_FILE" + append_data "$DUMP_FILE" + exit 0 fi if $SCHEMA; then - dump_schema "schema__$DUMP_FILE" + dump_schema "schema__$DUMP_FILE" fi if $DATA; then - dump_data "data__$DUMP_FILE" + dump_data "data__$DUMP_FILE" fi diff --git a/docs/manual/scripts.rst b/docs/manual/scripts.rst index 3d00b244..1a8c5f67 100644 --- a/docs/manual/scripts.rst +++ b/docs/manual/scripts.rst @@ -7,7 +7,7 @@ Scripts Dump data --------- -Met het script ``dump_data.sh`` kan de data van alle componenten (core) worden geëxporteerd naar een sql bestand. +Met het script ``dump_data.sh`` kan de data van alle componenten (core) worden geëxporteerd naar een sql of csv bestand(en). Dit script is niet bedoeld voor een data migratie naar een andere Objects Api of Objecttypes Api instantie. @@ -23,6 +23,8 @@ Om alleen specifieke data te exporteren kunnen de gewenste component namen worde om een postgres 17 db te exporteren is de package postgres-client-17 vereist. +Met de flag ``--csv`` worden alle tabellen in de meegegeven componenten geëxporteerd naar een csv bestand. Deze bestanden worden in ``$CSV_EXPORT_DIR`` geplaatst. + Environment variabelen ---------------------- @@ -32,6 +34,7 @@ Environment variabelen * DB_NAME (objects/objecttypes) * DB_PASSWORD ("") * DUMP_FILE ("dump_$(date +'%Y-%m-%d_%H-%M-%S').sql") +* CSV_OUTPUT_DIR (csv_exports) .. code-block:: shell From 144633c3891ca51af9e4ad81996a6e6cbdd4cf8c Mon Sep 17 00:00:00 2001 From: Floris272 Date: Tue, 7 Oct 2025 15:43:45 +0200 Subject: [PATCH 2/7] :green_heart: [open-api-framework#188] fix dump data ci tests --- .github/workflows/bin-check.yml | 7 ++----- bin/dump_data.sh | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/bin-check.yml b/.github/workflows/bin-check.yml index cc7a1483..041c07b9 100644 --- a/.github/workflows/bin-check.yml +++ b/.github/workflows/bin-check.yml @@ -39,6 +39,8 @@ jobs: python src/manage.py migrate src/manage.py loaddata demodata SCRIPTPATH=bin DUMP_FILE=dump.sql bin/dump_data.sh --combined + mkdir csv_exports + SCRIPTPATH=bin DUMP_FILE=dump.sql bin/dump_data.sh --csv env: DB_PASSWORD: "" DB_USER: postgres @@ -57,11 +59,6 @@ jobs: createdb -h localhost -U postgres test psql -v ON_ERROR_STOP=1 -h localhost -U postgres -d test -f dump.sql - - name: dump data to csv - run: | - mkdir csv_exports - SCRIPTPATH=bin DUMP_FILE=dump.sql bin/dump_data.sh --csv - - name: validate csv dump run: | test -f csv_exports/core_object.csv || exit 1 diff --git a/bin/dump_data.sh b/bin/dump_data.sh index 569894dc..78235f9d 100755 --- a/bin/dump_data.sh +++ b/bin/dump_data.sh @@ -11,7 +11,7 @@ # The schema dump could not use -t to filter tables because this excludes extensions like postgis in the dump. # pg_dump also does not add related tables automatically, so `dump_data.sh` does not add related data from accounts to the dump. # -# with --csv and csv dump can be created for all tables in the given components. The csv files will be generated in CSV_OUTPUT_DIR (csv_exports by default). +# with --csv a csv dump can be created for all tables in the given components. The csv files will be generated in CSV_OUTPUT_DIR (csv_exports by default). set -e From f8eb6571b4b416720ae883a5eb539ff5f849ac10 Mon Sep 17 00:00:00 2001 From: Floris272 Date: Tue, 14 Oct 2025 11:33:57 +0200 Subject: [PATCH 3/7] :sparkles: [open-api-framework#188] combine csv dump into zip archive --- .github/workflows/bin-check.yml | 10 +++++----- bin/dump_data.sh | 13 +++++++++---- csv_dumps/.gitkeep | 0 docs/manual/scripts.rst | 4 ++-- 4 files changed, 16 insertions(+), 11 deletions(-) create mode 100644 csv_dumps/.gitkeep diff --git a/.github/workflows/bin-check.yml b/.github/workflows/bin-check.yml index 041c07b9..f913358f 100644 --- a/.github/workflows/bin-check.yml +++ b/.github/workflows/bin-check.yml @@ -39,8 +39,7 @@ jobs: python src/manage.py migrate src/manage.py loaddata demodata SCRIPTPATH=bin DUMP_FILE=dump.sql bin/dump_data.sh --combined - mkdir csv_exports - SCRIPTPATH=bin DUMP_FILE=dump.sql bin/dump_data.sh --csv + SCRIPTPATH=bin ZIP_FILE=dump.zip bin/dump_data.sh --csv env: DB_PASSWORD: "" DB_USER: postgres @@ -61,8 +60,9 @@ jobs: - name: validate csv dump run: | - test -f csv_exports/core_object.csv || exit 1 - ! test -f csv_exports/auth_group.csv || exit 1 - grep "id,uuid,object_type_id,created_on,modified_on" csv_exports/core_object.csv + unzip dump.zip + test -f core_object.csv || exit 1 + ! test -f auth_group.csv || exit 1 + grep "id,uuid,object_type_id,created_on,modified_on" core_object.csv diff --git a/bin/dump_data.sh b/bin/dump_data.sh index 78235f9d..0998a082 100755 --- a/bin/dump_data.sh +++ b/bin/dump_data.sh @@ -11,7 +11,8 @@ # The schema dump could not use -t to filter tables because this excludes extensions like postgis in the dump. # pg_dump also does not add related tables automatically, so `dump_data.sh` does not add related data from accounts to the dump. # -# with --csv a csv dump can be created for all tables in the given components. The csv files will be generated in CSV_OUTPUT_DIR (csv_exports by default). +# with --csv a csv dump can be created for all tables in the given components. The csv files will be generated in csv_dumps +# temporarily and combined into a single ZIP archive. set -e @@ -28,8 +29,10 @@ SCRIPTPATH=$(dirname "$SCRIPT") ${SCRIPTPATH}/wait_for_db.sh -DUMP_FILE=${DUMP_FILE:-"dump_$(date +'%Y-%m-%d_%H-%M-%S').sql"} -CSV_OUTPUT_DIR=${CSV_OUTPUT_DIR:-"csv_exports"} +DEFAULT_FILE_NAME="dump_$(date +'%Y-%m-%d_%H-%M-%S')" +DUMP_FILE=${DUMP_FILE:-"$DEFAULT_FILE_NAME.sql"} +ZIP_FILE=${ZIP_FILE:-"$DEFAULT_FILE_NAME.zip"} +CSV_OUTPUT_DIR="csv_dumps" CSV=false SCHEMA=true @@ -98,10 +101,12 @@ dump_csv() { echo "dumping $table..." psql -c "\copy $table TO '$CSV_OUTPUT_DIR/$table.csv' WITH CSV HEADER" done + + zip -j "$ZIP_FILE" "$CSV_OUTPUT_DIR"/*.csv + rm -f "$CSV_OUTPUT_DIR"/*.csv } if $CSV; then - if [ ! -d "$CSV_OUTPUT_DIR" ]; then echo "csv output directory $CSV_OUTPUT_DIR does not exist in current path" else diff --git a/csv_dumps/.gitkeep b/csv_dumps/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/docs/manual/scripts.rst b/docs/manual/scripts.rst index 1a8c5f67..958268a2 100644 --- a/docs/manual/scripts.rst +++ b/docs/manual/scripts.rst @@ -23,7 +23,7 @@ Om alleen specifieke data te exporteren kunnen de gewenste component namen worde om een postgres 17 db te exporteren is de package postgres-client-17 vereist. -Met de flag ``--csv`` worden alle tabellen in de meegegeven componenten geëxporteerd naar een csv bestand. Deze bestanden worden in ``$CSV_EXPORT_DIR`` geplaatst. +Met de flag ``--csv`` worden alle tabellen in de meegegeven componenten geëxporteerd naar csv bestanden. Deze bestanden worden tijdelijk in ``csv_dumps`` geplaatst en gecombineerd in een ZIP bestand. Environment variabelen ---------------------- @@ -34,7 +34,7 @@ Environment variabelen * DB_NAME (objects/objecttypes) * DB_PASSWORD ("") * DUMP_FILE ("dump_$(date +'%Y-%m-%d_%H-%M-%S').sql") -* CSV_OUTPUT_DIR (csv_exports) +* CSV_FILE ("dump_$(date +'%Y-%m-%d_%H-%M-%S').zip") .. code-block:: shell From 8862f7e6d441a21f6a3a2413f82d062dfb38dad5 Mon Sep 17 00:00:00 2001 From: Floris272 Date: Tue, 21 Oct 2025 13:42:54 +0200 Subject: [PATCH 4/7] :recycle: [open-api-framework#188] create and remove temp dir in script --- bin/dump_data.sh | 13 +++++-------- csv_dumps/.gitkeep | 0 2 files changed, 5 insertions(+), 8 deletions(-) delete mode 100644 csv_dumps/.gitkeep diff --git a/bin/dump_data.sh b/bin/dump_data.sh index 0998a082..9e699e26 100755 --- a/bin/dump_data.sh +++ b/bin/dump_data.sh @@ -11,8 +11,8 @@ # The schema dump could not use -t to filter tables because this excludes extensions like postgis in the dump. # pg_dump also does not add related tables automatically, so `dump_data.sh` does not add related data from accounts to the dump. # -# with --csv a csv dump can be created for all tables in the given components. The csv files will be generated in csv_dumps -# temporarily and combined into a single ZIP archive. +# with --csv a csv dump can be created for all tables in the given components. The csv files will be generated in the temporary directory csv_dumps +# and combined into a single ZIP archive csv_dumps. set -e @@ -85,6 +85,7 @@ append_data() { } dump_csv() { + mkdir -p $CSV_OUTPUT_DIR echo "Dumping data to csv..." WHERE_CLAUSE="" @@ -103,15 +104,11 @@ dump_csv() { done zip -j "$ZIP_FILE" "$CSV_OUTPUT_DIR"/*.csv - rm -f "$CSV_OUTPUT_DIR"/*.csv + rm -rf "$CSV_OUTPUT_DIR" } if $CSV; then - if [ ! -d "$CSV_OUTPUT_DIR" ]; then - echo "csv output directory $CSV_OUTPUT_DIR does not exist in current path" - else - dump_csv - fi + dump_csv exit 0 fi diff --git a/csv_dumps/.gitkeep b/csv_dumps/.gitkeep deleted file mode 100644 index e69de29b..00000000 From 46f6e4c5684da1e19f81017432468bf43ce8ee97 Mon Sep 17 00:00:00 2001 From: floris272 Date: Fri, 24 Oct 2025 17:11:15 +0200 Subject: [PATCH 5/7] :recycle: [open-api-framework#188] change zip to tar --- .github/workflows/bin-check.yml | 4 ++-- bin/dump_data.sh | 6 +++--- docs/manual/scripts.rst | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/bin-check.yml b/.github/workflows/bin-check.yml index f913358f..5ca17b0d 100644 --- a/.github/workflows/bin-check.yml +++ b/.github/workflows/bin-check.yml @@ -39,7 +39,7 @@ jobs: python src/manage.py migrate src/manage.py loaddata demodata SCRIPTPATH=bin DUMP_FILE=dump.sql bin/dump_data.sh --combined - SCRIPTPATH=bin ZIP_FILE=dump.zip bin/dump_data.sh --csv + SCRIPTPATH=bin TAR_FILE=dump.tar bin/dump_data.sh --csv env: DB_PASSWORD: "" DB_USER: postgres @@ -60,7 +60,7 @@ jobs: - name: validate csv dump run: | - unzip dump.zip + tar -xf dump.tar test -f core_object.csv || exit 1 ! test -f auth_group.csv || exit 1 grep "id,uuid,object_type_id,created_on,modified_on" core_object.csv diff --git a/bin/dump_data.sh b/bin/dump_data.sh index 9e699e26..6f3229da 100755 --- a/bin/dump_data.sh +++ b/bin/dump_data.sh @@ -12,7 +12,7 @@ # pg_dump also does not add related tables automatically, so `dump_data.sh` does not add related data from accounts to the dump. # # with --csv a csv dump can be created for all tables in the given components. The csv files will be generated in the temporary directory csv_dumps -# and combined into a single ZIP archive csv_dumps. +# and combined into a single TAR archive csv_dumps. set -e @@ -31,7 +31,7 @@ ${SCRIPTPATH}/wait_for_db.sh DEFAULT_FILE_NAME="dump_$(date +'%Y-%m-%d_%H-%M-%S')" DUMP_FILE=${DUMP_FILE:-"$DEFAULT_FILE_NAME.sql"} -ZIP_FILE=${ZIP_FILE:-"$DEFAULT_FILE_NAME.zip"} +TAR_FILE=${TAR_FILE:-"$DEFAULT_FILE_NAME.tar"} CSV_OUTPUT_DIR="csv_dumps" CSV=false @@ -103,7 +103,7 @@ dump_csv() { psql -c "\copy $table TO '$CSV_OUTPUT_DIR/$table.csv' WITH CSV HEADER" done - zip -j "$ZIP_FILE" "$CSV_OUTPUT_DIR"/*.csv + tar -cf "$TAR_FILE" -C "$CSV_OUTPUT_DIR" . rm -rf "$CSV_OUTPUT_DIR" } diff --git a/docs/manual/scripts.rst b/docs/manual/scripts.rst index 958268a2..2ae6d0cb 100644 --- a/docs/manual/scripts.rst +++ b/docs/manual/scripts.rst @@ -17,13 +17,13 @@ Om alleen specifieke data te exporteren kunnen de gewenste component namen worde .. code-block:: shell - ./dump_data.sh core + /dump_data.sh core .. note:: om een postgres 17 db te exporteren is de package postgres-client-17 vereist. -Met de flag ``--csv`` worden alle tabellen in de meegegeven componenten geëxporteerd naar csv bestanden. Deze bestanden worden tijdelijk in ``csv_dumps`` geplaatst en gecombineerd in een ZIP bestand. +Met de flag ``--csv`` worden alle tabellen in de meegegeven componenten geëxporteerd naar csv bestanden. Deze bestanden worden tijdelijk in ``csv_dumps`` geplaatst en gecombineerd in een TAR bestand. Environment variabelen ---------------------- @@ -34,7 +34,7 @@ Environment variabelen * DB_NAME (objects/objecttypes) * DB_PASSWORD ("") * DUMP_FILE ("dump_$(date +'%Y-%m-%d_%H-%M-%S').sql") -* CSV_FILE ("dump_$(date +'%Y-%m-%d_%H-%M-%S').zip") +* TAR_FILE ("dump_$(date +'%Y-%m-%d_%H-%M-%S').tar") .. code-block:: shell From 1c60dd34c82b995b7ca79257f3b7a60fcc582092 Mon Sep 17 00:00:00 2001 From: floris272 Date: Tue, 4 Nov 2025 17:29:34 +0100 Subject: [PATCH 6/7] :construction_worker: [open-api-framework#188] upgrade to debian slim-trixie --- Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 948f1dd5..500c298c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Stage 1 - Compile needed python dependencies -FROM python:3.12-slim-bookworm AS backend-build +FROM python:3.12-slim-trixie AS backend-build RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ pkg-config \ @@ -7,8 +7,8 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco git \ libpq-dev \ # required for (log) routing support in uwsgi - libpcre3 \ - libpcre3-dev \ + libpcre2-8-0 \ + libpcre2-dev \ && rm -rf /var/lib/apt/lists/* @@ -36,7 +36,7 @@ RUN npm run build # Stage 3 - Build docker image suitable for execution and deployment -FROM python:3.12-slim-bookworm AS production +FROM python:3.12-slim-trixie AS production # Stage 3.1 - Set up the needed production dependencies # install all the dependencies for GeoDjango @@ -47,7 +47,7 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco gdal-bin \ libgdal-dev \ gettext \ - libpcre3 \ + libpcre2-8-0 \ && rm -rf /var/lib/apt/lists/* RUN pip install pip "setuptools>=70.0.0" From df507e566dbc4fe6318e73018060931d226db195 Mon Sep 17 00:00:00 2001 From: floris272 Date: Wed, 5 Nov 2025 11:01:30 +0100 Subject: [PATCH 7/7] :arrow_up: [open-api-framework#188] upgrade uwsgi --- requirements/base.txt | 2 +- requirements/ci.txt | 2 +- requirements/dev.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 2701a921..1934e38c 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -368,7 +368,7 @@ urllib3==2.5.0 # elastic-apm # requests # sentry-sdk -uwsgi==2.0.23 +uwsgi==2.0.31 # via open-api-framework vine==5.1.0 # via diff --git a/requirements/ci.txt b/requirements/ci.txt index 247021ff..d2f86879 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -741,7 +741,7 @@ urllib3==2.5.0 # requests # sentry-sdk # vcrpy -uwsgi==2.0.23 +uwsgi==2.0.31 # via # -c requirements/base.txt # -r requirements/base.txt diff --git a/requirements/dev.txt b/requirements/dev.txt index be6f2bab..132bcca6 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -933,7 +933,7 @@ urllib3==2.5.0 # requests # sentry-sdk # vcrpy -uwsgi==2.0.23 +uwsgi==2.0.31 # via # -c requirements/ci.txt # -r requirements/ci.txt