From cc28fba31e670a03593d8d7644a513cae054f98d Mon Sep 17 00:00:00 2001 From: Rub21 Date: Thu, 7 Nov 2024 12:49:59 -0500 Subject: [PATCH 1/4] Update livenessProbe for replication job --- osm-seed/templates/jobs/replication-job-deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/osm-seed/templates/jobs/replication-job-deployment.yaml b/osm-seed/templates/jobs/replication-job-deployment.yaml index fe2ea3a8..1b1ed516 100644 --- a/osm-seed/templates/jobs/replication-job-deployment.yaml +++ b/osm-seed/templates/jobs/replication-job-deployment.yaml @@ -28,9 +28,9 @@ spec: - /bin/bash - -c - /liveness.sh - initialDelaySeconds: 10 + initialDelaySeconds: 60 timeoutSeconds: 5 - periodSeconds: 10 + periodSeconds: 30 failureThreshold: 3 {{- if .Values.replicationJob.resources.enabled }} resources: From 6ffb71d7a926836c6c8a0368bc1342c30cc21283 Mon Sep 17 00:00:00 2001 From: Rub21 Date: Thu, 7 Nov 2024 15:49:46 -0500 Subject: [PATCH 2/4] Update script for minute replication file process --- images/replication-job/start.sh | 59 +++++++++++++++++---------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/images/replication-job/start.sh b/images/replication-job/start.sh index 5653eb7a..903af195 100755 --- a/images/replication-job/start.sh +++ b/images/replication-job/start.sh @@ -49,6 +49,10 @@ function get_current_state_file() { --file $workingDirectory/state.txt --query="name" fi fi + else + echo "File $workingDirectory/state.txt exist in local storage" + echo "File $workingDirectory/state.txt content:" + cat $workingDirectory/state.txt fi } @@ -71,29 +75,43 @@ function upload_file_cloud() { } function monitor_minute_replication() { - # Function to handle continuous monitoring, minutminutes replication and upload to cloud provider - # Directory to store a log of processed files + # Function to handle continuous monitoring, minute replication, and sequential upload to cloud provider + # Directory to store a log of the last processed file processed_files_log="$workingDirectory/processed_files.log" max_log_size_mb=1 - while true; do - upload_file_cloud /mnt/data/state.txt - sleep 60s - done & - while true; do if [ -e "$processed_files_log" ]; then log_size=$(du -m "$processed_files_log" | cut -f1) if [ "$log_size" -gt "$max_log_size_mb" ]; then echo $(date +%F_%H:%M:%S)": Cleaning processed_files_log..." >"$processed_files_log" fi - for local_minute_file in $(find $workingDirectory/ -cmin -1); do + # Find new .gz files created within the last minute + for local_minute_file in $(find $workingDirectory/ -name "*.gz" -cmin -1); do if [ -f "$local_minute_file" ]; then - if grep -q "$local_minute_file" "$processed_files_log"; then - continue + echo "Processing $local_minute_file..." + # Ensure the file is uploaded only once + if ! grep -q "$local_minute_file: SUCCESS" "$processed_files_log" && ! grep -q "$local_minute_file: FAILURE" "$processed_files_log"; then + # Verify gz file integrity + if gzip -t "$local_minute_file" 2>/dev/null; then + # Upload the file sequentially + upload_file_cloud $local_minute_file + echo "$local_minute_file: SUCCESS" >>"$processed_files_log" + # Upload and update state.txt after successful upload + upload_file_cloud "$workingDirectory/state.txt" + else + echo $(date +%F_%H:%M:%S)": $local_minute_file is corrupted and will not be uploaded." >>"$processed_files_log" + echo "$local_minute_file: FAILURE" >>"$processed_files_log" + # Ensure state.txt maintains the current ID to regenerate the corrupted file + current_state_id=$(( $(echo "$local_minute_file" | sed 's/[^0-9]//g' | sed 's/^0*//') - 1 )) + sed -i "s/sequenceNumber=.*/sequenceNumber=$current_state_id/" "$workingDirectory/state.txt" + rm "$local_minute_file" + echo "Stopping any existing Osmosis processes..." + pkill -f "osmosis.*--replicate-apidb" + echo "Regenerating $local_minute_file..." + generate_replication + fi fi - upload_file_cloud $local_minute_file - echo "$local_minute_file" >>"$processed_files_log" fi done else @@ -120,23 +138,6 @@ function generate_replication() { workingDirectory=$workingDirectory } -# function start_nginx() { -# if [ "$STAR_NGINX_SERVER" = "true" ]; then -# echo 'server { -# listen 8080; -# server_name localhost; - -# location / { -# root /mnt/data; -# index index.html; -# } -# }' >/etc/nginx/nginx.conf -# service nginx restart -# else -# echo "STAR_NGINX_SERVER is either not set or not set to true." -# fi -# } - ######################## Start minutes replication process ######################## get_current_state_file flag=true From feeb6f1f210efd65783ab16e3e6f1bd31a131192 Mon Sep 17 00:00:00 2001 From: Rub21 Date: Fri, 8 Nov 2024 09:55:05 -0500 Subject: [PATCH 3/4] Update replication script to send message to slack --- images/replication-job/Dockerfile | 3 +- images/replication-job/start.sh | 30 +++++++++++++++++++ images/web/Dockerfile | 2 +- .../jobs/replication-job-deployment.yaml | 6 ++++ osm-seed/values.yaml | 3 ++ 5 files changed, 42 insertions(+), 2 deletions(-) diff --git a/images/replication-job/Dockerfile b/images/replication-job/Dockerfile index 992421f8..843d7c1a 100644 --- a/images/replication-job/Dockerfile +++ b/images/replication-job/Dockerfile @@ -5,7 +5,8 @@ RUN apt-get update && \ nginx \ python3-pip \ python3-venv \ - procps && \ + procps \ + curl && \ rm -rf /var/lib/apt/lists/* RUN python3 -m venv /opt/venv diff --git a/images/replication-job/start.sh b/images/replication-job/start.sh index 903af195..30453c14 100755 --- a/images/replication-job/start.sh +++ b/images/replication-job/start.sh @@ -8,6 +8,9 @@ else echo JAVACMD_OPTIONS=\"-server -Xmx$memory\" >~/.osmosis fi +slack_message_count=0 +max_slack_messages=2 + workingDirectory="/mnt/data" mkdir -p $workingDirectory @@ -74,6 +77,32 @@ function upload_file_cloud() { fi } +function send_slack_message() { + # Check if Slack messaging is enabled + if [ "${ENABLE_SEND_SLACK_MESSAGE}" != "true" ]; then + echo "Slack messaging is disabled. Set ENABLE_SEND_SLACK_MESSAGE to true to enable." + return + fi + + # Check if the Slack webhook URL is set + if [ -z "${SLACK_WEBHOOK_URL}" ]; then + echo "SLACK_WEBHOOK_URL is not set. Unable to send message to Slack." + return 1 + fi + + # Limit Slack message count to 3 + if [ "$slack_message_count" -ge "$max_slack_messages" ]; then + echo "Max Slack messages limit reached. No further messages will be sent." + return + fi + + local message="$1" + curl -X POST -H 'Content-type: application/json' --data "{\"text\": \"$message\"}" "$SLACK_WEBHOOK_URL" + echo "Message sent to Slack: $message" + slack_message_count=$((slack_message_count + 1)) +} + + function monitor_minute_replication() { # Function to handle continuous monitoring, minute replication, and sequential upload to cloud provider # Directory to store a log of the last processed file @@ -109,6 +138,7 @@ function monitor_minute_replication() { echo "Stopping any existing Osmosis processes..." pkill -f "osmosis.*--replicate-apidb" echo "Regenerating $local_minute_file..." + send_slack_message "${ENVIROMENT}: Corrupted file $local_minute_file detected. Regenerating the file..." generate_replication fi fi diff --git a/images/web/Dockerfile b/images/web/Dockerfile index 927e49da..c6ce857c 100644 --- a/images/web/Dockerfile +++ b/images/web/Dockerfile @@ -50,7 +50,7 @@ RUN npm install -g svgo RUN rm -rf $workdir/html ## Sep 2023 -ENV OPENSTREETMAP_WEBSITE_GITSHA=d23763d6cdbf5ec11f0e83f8e6e8fb32ed973e6a +ENV OPENSTREETMAP_WEBSITE_GITSHA=1ad7f22a04465c32ce82ffaac3aaee0df119648b RUN curl -L https://github.com/openstreetmap/openstreetmap-website/archive/$OPENSTREETMAP_WEBSITE_GITSHA.zip --output website.zip && unzip website.zip RUN mv openstreetmap-website-$OPENSTREETMAP_WEBSITE_GITSHA/* $workdir/ WORKDIR $workdir diff --git a/osm-seed/templates/jobs/replication-job-deployment.yaml b/osm-seed/templates/jobs/replication-job-deployment.yaml index 1b1ed516..5c18bdeb 100644 --- a/osm-seed/templates/jobs/replication-job-deployment.yaml +++ b/osm-seed/templates/jobs/replication-job-deployment.yaml @@ -78,6 +78,12 @@ spec: - name: MEMORY_JAVACMD_OPTIONS value: {{ .Values.replicationJob.resources.requests.memory | default "2Gi" | quote}} {{- end }} + - name: ENABLE_SEND_SLACK_MESSAGE + value: {{ .Values.replicationJob.env.ENABLE_SEND_SLACK_MESSAGE | quote}} + - name: SLACK_WEBHOOK_URL + value: {{ .Values.replicationJob.env.SLACK_WEBHOOK_URL | quote}} + - name: ENVIROMENT + value: {{ .Values.environment | quote}} {{- if .Values.replicationJob.nodeSelector.enabled }} nodeSelector: {{ .Values.replicationJob.nodeSelector.label_key }} : {{ .Values.replicationJob.nodeSelector.label_value }} diff --git a/osm-seed/values.yaml b/osm-seed/values.yaml index b30717bf..2a345d6d 100644 --- a/osm-seed/values.yaml +++ b/osm-seed/values.yaml @@ -293,6 +293,9 @@ replicationJob: image: name: "" tag: "" + env: + ENABLE_SEND_SLACK_MESSAGE: "false" + SLACK_WEBHOOK_URL: "null" resources: enabled: false requests: From bae7878126f413fcdbf6451b45f1431ee58a1fc4 Mon Sep 17 00:00:00 2001 From: Rub21 Date: Fri, 8 Nov 2024 10:02:41 -0500 Subject: [PATCH 4/4] Reset gitsha for web api --- images/web/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/images/web/Dockerfile b/images/web/Dockerfile index c6ce857c..927e49da 100644 --- a/images/web/Dockerfile +++ b/images/web/Dockerfile @@ -50,7 +50,7 @@ RUN npm install -g svgo RUN rm -rf $workdir/html ## Sep 2023 -ENV OPENSTREETMAP_WEBSITE_GITSHA=1ad7f22a04465c32ce82ffaac3aaee0df119648b +ENV OPENSTREETMAP_WEBSITE_GITSHA=d23763d6cdbf5ec11f0e83f8e6e8fb32ed973e6a RUN curl -L https://github.com/openstreetmap/openstreetmap-website/archive/$OPENSTREETMAP_WEBSITE_GITSHA.zip --output website.zip && unzip website.zip RUN mv openstreetmap-website-$OPENSTREETMAP_WEBSITE_GITSHA/* $workdir/ WORKDIR $workdir