diff --git a/.github/workflows/Dev_CD.yml b/.github/workflows/Dev_CD.yml index e9a4e830..1aae3280 100644 --- a/.github/workflows/Dev_CD.yml +++ b/.github/workflows/Dev_CD.yml @@ -59,7 +59,7 @@ jobs: uses: actions/download-artifact@v4 with: name: app-artifact - path: ~/app + path: ~/app/staging - name: Download deploy scripts uses: actions/download-artifact@v4 @@ -67,11 +67,17 @@ jobs: name: deploy-scripts path: ~/app/scripts/ - - name: Replace application to latest - run: sudo sh ~/app/scripts/replace-new-version.sh + - name: Setup log directory + run: | + sudo mkdir -p /home/ubuntu/logs + sudo chown -R ubuntu:ubuntu /home/ubuntu/logs + chmod 755 /home/ubuntu/logs + + - name: Make deploy script executable + run: chmod +x ~/app/scripts/zero-downtime-deploy.sh - - name: Health Check - run: sh ~/app/scripts/health-check.sh + - name: Zero Downtime Deployment + run: sh ~/app/scripts/zero-downtime-deploy.sh - name: Send Discord Alert on Failure if: failure() diff --git a/.github/workflows/Prod_CD.yml b/.github/workflows/Prod_CD.yml index cb10ee58..dad56d3a 100644 --- a/.github/workflows/Prod_CD.yml +++ b/.github/workflows/Prod_CD.yml @@ -59,7 +59,7 @@ jobs: uses: actions/download-artifact@v4 with: name: app-artifact - path: ~/app + path: ~/app/staging - name: Download deploy scripts uses: actions/download-artifact@v4 @@ -67,11 +67,17 @@ jobs: name: deploy-scripts path: ~/app/scripts/ - - name: Replace application to latest - run: sudo sh ~/app/scripts/replace-new-version.sh + - name: Setup log directory + run: | + sudo mkdir -p /home/ubuntu/logs + sudo chown -R ubuntu:ubuntu /home/ubuntu/logs + chmod 755 /home/ubuntu/logs + + - name: Make deploy script executable + run: chmod +x ~/app/scripts/zero-downtime-deploy.sh - - name: Health Check - run: sh ~/app/scripts/health-check.sh + - name: Zero Downtime Deployment + run: sh ~/app/scripts/zero-downtime-deploy.sh - name: Send Discord Alert on Failure if: failure() diff --git a/.gitignore b/.gitignore index 671e0e9f..7d690374 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,4 @@ out/ ### application-local.yml /src/main/resources/application-local.yml +.serena diff --git a/nginx/api.dev.debate-timer.com b/nginx/api.dev.debate-timer.com new file mode 100644 index 00000000..5b775735 --- /dev/null +++ b/nginx/api.dev.debate-timer.com @@ -0,0 +1,34 @@ +upstream debate_timer_backend { + server 127.0.0.1:8080; + keepalive 32; +} + +server { + server_name api.dev.debate-timer.com; + + location / { + proxy_pass http://debate_timer_backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + listen [::]:443 ssl ipv6only=on; # managed by Certbot + listen 443 ssl; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/api.dev.debate-timer.com/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/api.dev.debate-timer.com/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot +} + +server { + if ($host = api.dev.debate-timer.com) { + return 308 https://$host$request_uri; + } # managed by Certbot + + listen 80; + listen [::]:80; + server_name api.dev.debate-timer.com; + return 404; # managed by Certbot +} diff --git a/nginx/api.prod.debate-timer.com b/nginx/api.prod.debate-timer.com new file mode 100644 index 00000000..efa873fe --- /dev/null +++ b/nginx/api.prod.debate-timer.com @@ -0,0 +1,34 @@ +upstream debate_timer_backend { + server 127.0.0.1:8080; + keepalive 32; +} + +server { + server_name api.prod.debate-timer.com; + + location / { + proxy_pass http://debate_timer_backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + listen [::]:443 ssl ipv6only=on; # managed by Certbot + listen 443 ssl; # managed by Certbot + ssl_certificate /etc/letsencrypt/live/api.prod.debate-timer.com/fullchain.pem; # managed by Certbot + ssl_certificate_key /etc/letsencrypt/live/api.prod.debate-timer.com/privkey.pem; # managed by Certbot + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot +} + +server { + if ($host = api.prod.debate-timer.com) { + return 308 https://$host$request_uri; + } # managed by Certbot + + listen 80; + listen [::]:80; + server_name api.prod.debate-timer.com; + return 404; # managed by Certbot +} diff --git a/scripts/dev/zero-downtime-deploy.sh b/scripts/dev/zero-downtime-deploy.sh new file mode 100644 index 00000000..992252c3 --- /dev/null +++ b/scripts/dev/zero-downtime-deploy.sh @@ -0,0 +1,238 @@ +#!/bin/bash + +set -e + +APP_DIR="/home/ubuntu/app" +PORT_FILE="$APP_DIR/current_port.txt" +LOG_FILE="$APP_DIR/deploy.log" +BLUE_PORT=8080 +GREEN_PORT=8081 +BLUE_MONITOR_PORT=8083 +GREEN_MONITOR_PORT=8084 +MAX_HEALTH_CHECK_RETRIES=60 +HEALTH_CHECK_INTERVAL=2 +PROFILE="dev" +TIMEZONE="Asia/Seoul" + +log() { + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + echo "${timestamp} $@" | tee -a "$LOG_FILE" +} + +error_exit() { + log "$1" + exit 1 +} + +get_current_port() { + if [ ! -f "$PORT_FILE" ]; then + log "Port file not found. Initializing with default port $BLUE_PORT" + echo "$BLUE_PORT" > "$PORT_FILE" + echo "$BLUE_PORT" + else + cat "$PORT_FILE" + fi +} + +get_inactive_port() { + local current_port=$1 + if [ "$current_port" -eq "$BLUE_PORT" ]; then + echo "$GREEN_PORT" + else + echo "$BLUE_PORT" + fi +} + +get_monitor_port() { + local app_port=$1 + if [ "$app_port" -eq "$BLUE_PORT" ]; then + echo "$BLUE_MONITOR_PORT" + else + echo "$GREEN_MONITOR_PORT" + fi +} + +is_port_in_use() { + local port=$1 + sudo lsof -t -i:$port > /dev/null 2>&1 + return $? +} + +kill_process_on_port() { + local port=$1 + local pid=$(sudo lsof -t -i:$port 2>/dev/null) + + if [ -z "$pid" ]; then + log "No process running on port $port" + return 0 + fi + + log "Sending graceful shutdown signal to process $pid on port $port" + sudo kill -15 "$pid" + + local wait_count=0 + while [ $wait_count -lt 65 ] && is_port_in_use "$port"; do + sleep 1 + wait_count=$((wait_count + 1)) + done + + if is_port_in_use "$port"; then + log "Process didn't stop gracefully, forcing shutdown" + sudo kill -9 "$pid" 2>/dev/null || true + sleep 2 + fi + + log "Process on port $port stopped successfully" +} + +health_check() { + local port=$1 + local monitor_port=$2 + local health_url="http://localhost:$monitor_port/monitoring/health" + + log "Starting health check for port $port (monitor: $monitor_port)" + + local retry=1 + while [ $retry -le $MAX_HEALTH_CHECK_RETRIES ]; do + local status=$(curl -s -o /dev/null -w "%{http_code}" "$health_url" 2>/dev/null || echo "000") + + log "Health check attempt $retry/$MAX_HEALTH_CHECK_RETRIES - Status: $status" + + if [ "$status" = "200" ]; then + log "Health check passed!" + return 0 + fi + + sleep $HEALTH_CHECK_INTERVAL + retry=$((retry + 1)) + done + + log "Health check failed after $MAX_HEALTH_CHECK_RETRIES attempts" + return 1 +} + +start_application() { + local port=$1 + local monitor_port=$2 + local staging_jar="$APP_DIR/staging/app.jar" + local jar_file="$APP_DIR/app-$port.jar" + + if [ ! -f "$staging_jar" ]; then + error_exit "No JAR file found in staging directory: $staging_jar" + fi + + log "Copying JAR from staging to $jar_file" + cp "$staging_jar" "$jar_file" + + log "Starting application on port $port with JAR: $jar_file" + + if is_port_in_use "$port"; then + log "Port $port is in use, cleaning up..." + kill_process_on_port "$port" + fi + + sudo nohup java \ + -Dspring.profiles.active=$PROFILE,monitor \ + -Duser.timezone=$TIMEZONE \ + -Dserver.port=$port \ + -Dmanagement.server.port=$monitor_port \ + -Ddd.service=debate-timer \ + -Ddd.env=$PROFILE \ + -jar "$jar_file" > "$APP_DIR/app-$port.log" 2>&1 & + + local pid=$! + log "Application started with PID: $pid" + + sleep 3 + + if ! kill -0 $pid 2>/dev/null; then + error_exit "Application process died immediately after start. Check logs at $APP_DIR/app-$port.log" + fi +} + +switch_nginx_upstream() { + local new_port=$1 + local nginx_conf="/etc/nginx/sites-available/api.dev.debate-timer.com" + local temp_conf="/tmp/api.dev.debate-timer.com.tmp" + local backup_conf="${nginx_conf}.bak" + + if [ ! -f "$nginx_conf" ]; then + error_exit "nginx configuration not found at $nginx_conf" + fi + + log "Switching nginx upstream to port $new_port" + sudo cp "$nginx_conf" "$backup_conf" + + sed "s/server 127\.0\.0\.1:[0-9]\+;/server 127.0.0.1:$new_port;/" "$nginx_conf" > "$temp_conf" + sudo cp "$temp_conf" "$nginx_conf" + + if ! sudo nginx -t 2>/dev/null; then + log "nginx configuration test failed, rolling back." + sudo cp "$backup_conf" "$nginx_conf" + sudo rm "$backup_conf" + return 1 + fi + + sudo nginx -s reload + log "nginx reloaded successfully" + + sleep 2 + local response=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost/" 2>/dev/null || echo "000") + if [ "$response" = "000" ] || [ "$response" = "502" ] || [ "$response" = "503" ]; then + log "nginx health check failed after reload (status: $response). Rolling back nginx config." + sudo cp "$backup_conf" "$nginx_conf" + sudo nginx -s reload + sudo rm "$backup_conf" + return 1 + fi + + log "nginx is now routing traffic to port $new_port" + sudo rm "$backup_conf" + return 0 +} + +main() { + local current_port=$(get_current_port) + local new_port=$(get_inactive_port "$current_port") + local new_monitor_port=$(get_monitor_port "$new_port") + + log "Current active port: $current_port" + log "Deploying to port: $new_port" + log "Monitor port: $new_monitor_port" + + log "Step 1/4: Starting new version on port $new_port" + start_application "$new_port" "$new_monitor_port" + + log "Step 2/4: Performing health check" + if ! health_check "$new_port" "$new_monitor_port"; then + log "Deployment failed: Health check did not pass" + log "Rolling back: Stopping new version on port $new_port" + kill_process_on_port "$new_port" + error_exit "Deployment aborted due to health check failure" + fi + + log "Step 3/4: Switching nginx to new version" + if ! switch_nginx_upstream "$new_port"; then + log "nginx switch failed, rolling back" + kill_process_on_port "$new_port" + error_exit "Deployment aborted due to nginx switch failure" + fi + + log "Step 4/4: Stopping old version on port $current_port" + kill_process_on_port "$current_port" + + local old_jar="$APP_DIR/app-$current_port.jar" + if [ -f "$old_jar" ]; then + log "Removing old JAR file: $old_jar" + rm -f "$old_jar" + fi + + echo "$new_port" > "$PORT_FILE" + log "Updated active port file to $new_port" + + log "Deployment completed successfully!" + log "Active port: $new_port" + log "Inactive port: $current_port" +} + +main "$@" diff --git a/scripts/nginx-switch-port.sh b/scripts/nginx-switch-port.sh new file mode 100644 index 00000000..586f5c67 --- /dev/null +++ b/scripts/nginx-switch-port.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +set -e + +NGINX_CONF="/etc/nginx/sites-available/api.dev.debate-timer.com" +BACKUP_CONF="/etc/nginx/sites-available/api.dev.debate-timer.com.backup" +TEMP_CONF="/tmp/api.dev.debate-timer.com.tmp" + +log() { + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + echo "${timestamp} $@" +} + +if [ -z "$1" ]; then + log "Usage: $0 " + log "Example: $0 8081" + exit 1 +fi + +NEW_PORT=$1 + +if ! [[ "$NEW_PORT" =~ ^[0-9]+$ ]] || [ "$NEW_PORT" -lt 1 ] || [ "$NEW_PORT" -gt 65535 ]; then + log "Invalid port number: $NEW_PORT" + exit 1 +fi + +if [ ! -f "$NGINX_CONF" ]; then + log "nginx configuration not found at $NGINX_CONF" + exit 1 +fi + +log "Backing up current nginx configuration" +sudo cp "$NGINX_CONF" "$BACKUP_CONF" + +log "Updating nginx upstream to port $NEW_PORT" +sed "s/server 127\.0\.0\.1:[0-9]\+;/server 127.0.0.1:$NEW_PORT;/" "$NGINX_CONF" > "$TEMP_CONF" + +log "Configuration changes:" +diff "$NGINX_CONF" "$TEMP_CONF" || true + +sudo cp "$TEMP_CONF" "$NGINX_CONF" + +log "Testing nginx configuration" +if ! sudo nginx -t 2>&1; then + log "nginx configuration test failed!" + log "Rolling back to previous configuration" + sudo cp "$BACKUP_CONF" "$NGINX_CONF" + exit 1 +fi + +log "Reloading nginx" +sudo nginx -s reload + +sleep 2 +HEALTH_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost/monitoring/health" 2>/dev/null || echo "000") + +if [ "$HEALTH_STATUS" = "200" ]; then + log "nginx successfully switched to port $NEW_PORT" + log "Health check: OK (status $HEALTH_STATUS)" + rm -f "$TEMP_CONF" + exit 0 +else + log "Health check failed after nginx reload (status: $HEALTH_STATUS)" + log "nginx may not be routing to the correct backend" + exit 1 +fi diff --git a/scripts/prod/zero-downtime-deploy.sh b/scripts/prod/zero-downtime-deploy.sh new file mode 100644 index 00000000..c4b41432 --- /dev/null +++ b/scripts/prod/zero-downtime-deploy.sh @@ -0,0 +1,238 @@ +#!/bin/bash + +set -e + +APP_DIR="/home/ubuntu/app" +PORT_FILE="$APP_DIR/current_port.txt" +LOG_FILE="$APP_DIR/deploy.log" +BLUE_PORT=8080 +GREEN_PORT=8081 +BLUE_MONITOR_PORT=8083 +GREEN_MONITOR_PORT=8084 +MAX_HEALTH_CHECK_RETRIES=60 +HEALTH_CHECK_INTERVAL=2 +PROFILE="prod" +TIMEZONE="Asia/Seoul" + +log() { + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + echo "${timestamp} $@" | tee -a "$LOG_FILE" +} + +error_exit() { + log "$1" + exit 1 +} + +get_current_port() { + if [ ! -f "$PORT_FILE" ]; then + log "Port file not found. Initializing with default port $BLUE_PORT" + echo "$BLUE_PORT" > "$PORT_FILE" + echo "$BLUE_PORT" + else + cat "$PORT_FILE" + fi +} + +get_inactive_port() { + local current_port=$1 + if [ "$current_port" -eq "$BLUE_PORT" ]; then + echo "$GREEN_PORT" + else + echo "$BLUE_PORT" + fi +} + +get_monitor_port() { + local app_port=$1 + if [ "$app_port" -eq "$BLUE_PORT" ]; then + echo "$BLUE_MONITOR_PORT" + else + echo "$GREEN_MONITOR_PORT" + fi +} + +is_port_in_use() { + local port=$1 + sudo lsof -t -i:$port > /dev/null 2>&1 + return $? +} + +kill_process_on_port() { + local port=$1 + local pid=$(sudo lsof -t -i:$port 2>/dev/null) + + if [ -z "$pid" ]; then + log "No process running on port $port" + return 0 + fi + + log "Sending graceful shutdown signal to process $pid on port $port" + sudo kill -15 "$pid" + + local wait_count=0 + while [ $wait_count -lt 65 ] && is_port_in_use "$port"; do + sleep 1 + wait_count=$((wait_count + 1)) + done + + if is_port_in_use "$port"; then + log "Process didn't stop gracefully, forcing shutdown" + sudo kill -9 "$pid" 2>/dev/null || true + sleep 2 + fi + + log "Process on port $port stopped successfully" +} + +health_check() { + local port=$1 + local monitor_port=$2 + local health_url="http://localhost:$monitor_port/monitoring/health" + + log "Starting health check for port $port (monitor: $monitor_port)" + + local retry=1 + while [ $retry -le $MAX_HEALTH_CHECK_RETRIES ]; do + local status=$(curl -s -o /dev/null -w "%{http_code}" "$health_url" 2>/dev/null || echo "000") + + log "Health check attempt $retry/$MAX_HEALTH_CHECK_RETRIES - Status: $status" + + if [ "$status" = "200" ]; then + log "Health check passed!" + return 0 + fi + + sleep $HEALTH_CHECK_INTERVAL + retry=$((retry + 1)) + done + + log "Health check failed after $MAX_HEALTH_CHECK_RETRIES attempts" + return 1 +} + +start_application() { + local port=$1 + local monitor_port=$2 + local staging_jar="$APP_DIR/staging/app.jar" + local jar_file="$APP_DIR/app-$port.jar" + + if [ ! -f "$staging_jar" ]; then + error_exit "No JAR file found in staging directory: $staging_jar" + fi + + log "Copying JAR from staging to $jar_file" + cp "$staging_jar" "$jar_file" + + log "Starting application on port $port with JAR: $jar_file" + + if is_port_in_use "$port"; then + log "Port $port is in use, cleaning up..." + kill_process_on_port "$port" + fi + + sudo nohup java \ + -Dspring.profiles.active=$PROFILE,monitor \ + -Duser.timezone=$TIMEZONE \ + -Dserver.port=$port \ + -Dmanagement.server.port=$monitor_port \ + -Ddd.service=debate-timer \ + -Ddd.env=$PROFILE \ + -jar "$jar_file" > "$APP_DIR/app-$port.log" 2>&1 & + + local pid=$! + log "Application started with PID: $pid" + + sleep 3 + + if ! kill -0 $pid 2>/dev/null; then + error_exit "Application process died immediately after start. Check logs at $APP_DIR/app-$port.log" + fi +} + +switch_nginx_upstream() { + local new_port=$1 + local nginx_conf="/etc/nginx/sites-available/api.prod.debate-timer.com" + local temp_conf="/tmp/api.prod.debate-timer.com.tmp" + local backup_conf="${nginx_conf}.bak" + + if [ ! -f "$nginx_conf" ]; then + error_exit "nginx configuration not found at $nginx_conf" + fi + + log "Switching nginx upstream to port $new_port" + sudo cp "$nginx_conf" "$backup_conf" + + sed "s/server 127\.0\.0\.1:[0-9]\+;/server 127.0.0.1:$new_port;/" "$nginx_conf" > "$temp_conf" + sudo cp "$temp_conf" "$nginx_conf" + + if ! sudo nginx -t 2>/dev/null; then + log "nginx configuration test failed, rolling back." + sudo cp "$backup_conf" "$nginx_conf" + sudo rm "$backup_conf" + return 1 + fi + + sudo nginx -s reload + log "nginx reloaded successfully" + + sleep 2 + local response=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost/" 2>/dev/null || echo "000") + if [ "$response" = "000" ] || [ "$response" = "502" ] || [ "$response" = "503" ]; then + log "nginx health check failed after reload (status: $response). Rolling back nginx config." + sudo cp "$backup_conf" "$nginx_conf" + sudo nginx -s reload + sudo rm "$backup_conf" + return 1 + fi + + log "nginx is now routing traffic to port $new_port" + sudo rm "$backup_conf" + return 0 +} + +main() { + local current_port=$(get_current_port) + local new_port=$(get_inactive_port "$current_port") + local new_monitor_port=$(get_monitor_port "$new_port") + + log "Current active port: $current_port" + log "Deploying to port: $new_port" + log "Monitor port: $new_monitor_port" + + log "Step 1/4: Starting new version on port $new_port" + start_application "$new_port" "$new_monitor_port" + + log "Step 2/4: Performing health check" + if ! health_check "$new_port" "$new_monitor_port"; then + log "Deployment failed: Health check did not pass" + log "Rolling back: Stopping new version on port $new_port" + kill_process_on_port "$new_port" + error_exit "Deployment aborted due to health check failure" + fi + + log "Step 3/4: Switching nginx to new version" + if ! switch_nginx_upstream "$new_port"; then + log "nginx switch failed, rolling back" + kill_process_on_port "$new_port" + error_exit "Deployment aborted due to nginx switch failure" + fi + + log "Step 4/4: Stopping old version on port $current_port" + kill_process_on_port "$current_port" + + local old_jar="$APP_DIR/app-$current_port.jar" + if [ -f "$old_jar" ]; then + log "Removing old JAR file: $old_jar" + rm -f "$old_jar" + fi + + echo "$new_port" > "$PORT_FILE" + log "Updated active port file to $new_port" + + log "Deployment completed successfully!" + log "Active port: $new_port" + log "Inactive port: $current_port" +} + +main "$@" diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 40e92645..8196d857 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -1,6 +1,11 @@ spring: profiles: default: local + lifecycle: + timeout-per-shutdown-phase: 60s + +server: + shutdown: graceful springdoc: swagger-ui: