diff --git a/.github/workflows/cd-api-dev.yml b/.github/workflows/cd-api-dev.yml index 859fc65e..5188c2f3 100644 --- a/.github/workflows/cd-api-dev.yml +++ b/.github/workflows/cd-api-dev.yml @@ -98,6 +98,7 @@ jobs: GOOGLE_CLIENT_ID_DEV: ${{ secrets.GOOGLE_CLIENT_ID_DEV }} GOOGLE_CLIENT_SECRET_DEV: ${{ secrets.GOOGLE_CLIENT_SECRET_DEV }} WIDGET_SECRET_KEY_DEV: ${{ secrets.WIDGET_SECRET_KEY_DEV }} + SLACK_WEBHOOK_URL_DEV: ${{ secrets.SLACK_WEBHOOK_URL_DEV }} run: | export HOSTNAME=$(hostname) cd kokomen-backend/docker/dev diff --git a/.github/workflows/cd-api-prod.yml b/.github/workflows/cd-api-prod.yml index 2c2b6b24..fc274359 100644 --- a/.github/workflows/cd-api-prod.yml +++ b/.github/workflows/cd-api-prod.yml @@ -50,13 +50,7 @@ jobs: runs-on: [ self-hosted, prod, new ] steps: - - name: Stop existing container - run: sudo docker rm -f kokomen-interview-api-prod - - - name: Docker Image prune - run: sudo docker image prune -f -a - - - name: pull docker compose yaml files + - name: Pull docker compose and deployment files working-directory: /home/ubuntu run: | [ -d kokomen-backend ] || git clone --filter=blob:none --no-checkout https://github.com/samhap-soft/kokomen-backend.git @@ -70,8 +64,14 @@ jobs: - name: Docker Image pull run: sudo docker pull samhap/kokomen-interview-api:prod - - name: Docker run - working-directory: /home/ubuntu + - name: Docker Image prune + run: sudo docker image prune -f + + - name: Make deploy script executable + run: chmod +x /home/ubuntu/kokomen-backend/docker/prod/api/deploy.sh + + - name: Run Blue-Green Deployment + working-directory: /home/ubuntu/kokomen-backend/docker/prod/api env: SPRING_DATASOURCE_URL_PROD: ${{ secrets.SPRING_DATASOURCE_URL_PROD }} SPRING_DATASOURCE_USERNAME_PROD: ${{ secrets.SPRING_DATASOURCE_USERNAME_PROD }} @@ -85,7 +85,12 @@ jobs: GOOGLE_CLIENT_ID_PROD: ${{ secrets.GOOGLE_CLIENT_ID_PROD }} GOOGLE_CLIENT_SECRET_PROD: ${{ secrets.GOOGLE_CLIENT_SECRET_PROD }} WIDGET_SECRET_KEY_PROD: ${{ secrets.WIDGET_SECRET_KEY_PROD }} + SLACK_WEBHOOK_URL_PROD: ${{ secrets.SLACK_WEBHOOK_URL_PROD }} run: | export HOSTNAME=$(hostname) - cd kokomen-backend/docker/prod/api - sudo -E docker compose -f docker-compose-prod.yml up -d + sudo -E ./deploy.sh + + - name: Verify deployment + run: | + sleep 5 + curl -sf http://localhost:80/actuator/health diff --git a/build.gradle b/build.gradle index ad60d037..50a6a581 100644 --- a/build.gradle +++ b/build.gradle @@ -54,6 +54,7 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-actuator' implementation 'ch.qos.logback.contrib:logback-json-classic:0.1.5' implementation 'ch.qos.logback.contrib:logback-jackson:0.1.5' + implementation 'com.github.maricn:logback-slack-appender:1.6.1' // PDF 텍스트 추출 implementation 'org.apache.pdfbox:pdfbox:3.0.3' diff --git a/docker/dev/docker-compose-dev.yml b/docker/dev/docker-compose-dev.yml index f3626479..7449dd46 100644 --- a/docker/dev/docker-compose-dev.yml +++ b/docker/dev/docker-compose-dev.yml @@ -24,6 +24,7 @@ services: GOOGLE_CLIENT_ID_DEV: ${GOOGLE_CLIENT_ID_DEV} GOOGLE_CLIENT_SECRET_DEV: ${GOOGLE_CLIENT_SECRET_DEV} WIDGET_SECRET_KEY_DEV: ${WIDGET_SECRET_KEY_DEV} + SLACK_WEBHOOK_URL_DEV: ${SLACK_WEBHOOK_URL_DEV} networks: - dev-kokomen-net diff --git a/docker/prod/api/deploy.sh b/docker/prod/api/deploy.sh new file mode 100755 index 00000000..91ad33e4 --- /dev/null +++ b/docker/prod/api/deploy.sh @@ -0,0 +1,109 @@ +#!/bin/bash +set -e + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +COMPOSE_FILE="$SCRIPT_DIR/docker-compose-prod.yml" +HEALTH_TIMEOUT=120 +HEALTH_INTERVAL=5 +GRACEFUL_SHUTDOWN_WAIT=65 + +log_info() { echo "[INFO] $1"; } +log_warn() { echo "[WARN] $1"; } +log_error() { echo "[ERROR] $1"; } + +get_active() { + if docker ps -q -f name=kokomen-api-blue | grep -q .; then + echo "blue" + elif docker ps -q -f name=kokomen-api-green | grep -q .; then + echo "green" + else + echo "none" + fi +} + +wait_healthy() { + local container=$1 + local elapsed=0 + + log_info "헬스체크 대기: $container (타임아웃: ${HEALTH_TIMEOUT}초)" + + while [ $elapsed -lt $HEALTH_TIMEOUT ]; do + status=$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "starting") + if [ "$status" = "healthy" ]; then + echo "" + log_info "헬스체크 통과! (${elapsed}초 소요)" + return 0 + fi + sleep $HEALTH_INTERVAL + elapsed=$((elapsed + HEALTH_INTERVAL)) + echo -n "." + done + + echo "" + log_error "헬스체크 실패: 타임아웃 (${HEALTH_TIMEOUT}초)" + return 1 +} + +main() { + log_info "========== Blue-Green 배포 시작 ==========" + + CURRENT=$(get_active) + log_info "현재 활성 환경: $CURRENT" + + if [ "$CURRENT" = "blue" ]; then + TARGET="green" + OLD="kokomen-api-blue" + elif [ "$CURRENT" = "green" ]; then + TARGET="blue" + OLD="kokomen-api-green" + else + TARGET="blue" + OLD="" + log_info "최초 배포: blue 환경으로 시작" + fi + + log_info "타겟 환경: $TARGET" + + # Step 1: Traefik이 실행 중인지 확인 + if ! docker ps -q -f name=traefik | grep -q .; then + log_info "Step 0: Traefik 시작" + sudo -E docker compose -f $COMPOSE_FILE up -d traefik + sleep 3 + fi + + # Step 2: 새 컨테이너 시작 + log_info "Step 1: $TARGET 컨테이너 시작" + sudo -E docker compose -f $COMPOSE_FILE --profile $TARGET up -d "kokomen-api-$TARGET" + + # Step 3: 헬스체크 대기 + log_info "Step 2: 헬스체크 수행" + if ! wait_healthy "kokomen-api-$TARGET"; then + log_error "배포 실패: 새 컨테이너 헬스체크 실패" + log_warn "롤백: 새 컨테이너 제거" + docker rm -f "kokomen-api-$TARGET" 2>/dev/null || true + exit 1 + fi + + # Step 4: Traefik 라우팅 안정화 대기 + log_info "Step 3: Traefik 라우팅 안정화 대기" + sleep 5 + + # Step 5: 기존 컨테이너 graceful 종료 + if [ -n "$OLD" ]; then + log_info "Step 4: 기존 컨테이너 종료 ($OLD, ${GRACEFUL_SHUTDOWN_WAIT}초 대기)" + docker stop -t $GRACEFUL_SHUTDOWN_WAIT "$OLD" || true + docker rm -f "$OLD" 2>/dev/null || true + log_info "기존 컨테이너 종료 완료" + fi + + # Step 6: 완료 확인 + log_info "Step 5: 배포 완료 확인" + if curl -sf "http://localhost:80/actuator/health" > /dev/null 2>&1; then + log_info "========== 배포 성공! ==========" + log_info "활성 환경: $TARGET" + else + log_warn "경고: 외부 헬스체크 실패 (Traefik 라우팅 확인 필요)" + fi +} + +main "$@" diff --git a/docker/prod/api/docker-compose-prod.yml b/docker/prod/api/docker-compose-prod.yml index 699b89a5..81bfb9d9 100644 --- a/docker/prod/api/docker-compose-prod.yml +++ b/docker/prod/api/docker-compose-prod.yml @@ -1,11 +1,25 @@ services: - kokomen-interview-api-prod: + traefik: + image: traefik:v3.0 + container_name: traefik + restart: unless-stopped + ports: + - "80:80" + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - ./traefik/traefik.yml:/etc/traefik/traefik.yml:ro + environment: + TZ: Asia/Seoul + networks: + - api-net + + kokomen-api-blue: image: samhap/kokomen-interview-api:prod - container_name: kokomen-interview-api-prod + container_name: kokomen-api-blue restart: on-failure:3 - ports: - - "8080:8080" - - "8081:8081" + expose: + - "8080" + - "8081" volumes: - ./app/logs:/logs environment: @@ -25,22 +39,73 @@ services: GOOGLE_CLIENT_ID_PROD: ${GOOGLE_CLIENT_ID_PROD} GOOGLE_CLIENT_SECRET_PROD: ${GOOGLE_CLIENT_SECRET_PROD} WIDGET_SECRET_KEY_PROD: ${WIDGET_SECRET_KEY_PROD} + SLACK_WEBHOOK_URL_PROD: ${SLACK_WEBHOOK_URL_PROD} + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8081/actuator/health"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 40s + labels: + - "traefik.enable=true" + - "traefik.http.routers.api.rule=Host(`api.kokomen.kr`)" + - "traefik.http.routers.api.entrypoints=web" + - "traefik.http.services.api.loadbalancer.server.port=8080" + - "traefik.http.services.api.loadbalancer.healthcheck.path=/actuator/health" + - "traefik.http.services.api.loadbalancer.healthcheck.port=8081" + - "traefik.http.services.api.loadbalancer.healthcheck.interval=5s" + - "traefik.http.services.api.loadbalancer.healthcheck.timeout=3s" networks: - api-net + profiles: + - blue - nginx: - image: nginx:1.28.0 - container_name: nginx - ports: - - "80:80" + kokomen-api-green: + image: samhap/kokomen-interview-api:prod + container_name: kokomen-api-green + restart: on-failure:3 + expose: + - "8080" + - "8081" volumes: - - ./nginx/nginx.conf:/etc/nginx/nginx.conf - - ./nginx/logs:/var/log/nginx - restart: unless-stopped + - ./app/logs:/logs environment: TZ: Asia/Seoul + JAVA_TOOL_OPTIONS: -Duser.timezone=Asia/Seoul + HOSTNAME: ${HOSTNAME} + SPRING_PROFILES_ACTIVE: prod + SPRING_DATASOURCE_URL_PROD: ${SPRING_DATASOURCE_URL_PROD} + SPRING_DATASOURCE_USERNAME_PROD: ${SPRING_DATASOURCE_USERNAME_PROD} + SPRING_DATASOURCE_PASSWORD_PROD: ${SPRING_DATASOURCE_PASSWORD_PROD} + REDIS_PRIMARY_HOST_PROD: ${REDIS_PRIMARY_HOST_PROD} + OPEN_AI_API_KEY: ${OPEN_AI_API_KEY} + KAKAO_CLIENT_ID_PROD: ${KAKAO_CLIENT_ID_PROD} + KAKAO_CLIENT_SECRET_PROD: ${KAKAO_CLIENT_SECRET_PROD} + KAKAO_ADMIN_KEY_PROD: ${KAKAO_ADMIN_KEY_PROD} + SUPERTONE_API_TOKEN: ${SUPERTONE_API_TOKEN} + GOOGLE_CLIENT_ID_PROD: ${GOOGLE_CLIENT_ID_PROD} + GOOGLE_CLIENT_SECRET_PROD: ${GOOGLE_CLIENT_SECRET_PROD} + WIDGET_SECRET_KEY_PROD: ${WIDGET_SECRET_KEY_PROD} + SLACK_WEBHOOK_URL_PROD: ${SLACK_WEBHOOK_URL_PROD} + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8081/actuator/health"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 40s + labels: + - "traefik.enable=true" + - "traefik.http.routers.api.rule=Host(`api.kokomen.kr`)" + - "traefik.http.routers.api.entrypoints=web" + - "traefik.http.services.api.loadbalancer.server.port=8080" + - "traefik.http.services.api.loadbalancer.healthcheck.path=/actuator/health" + - "traefik.http.services.api.loadbalancer.healthcheck.port=8081" + - "traefik.http.services.api.loadbalancer.healthcheck.interval=5s" + - "traefik.http.services.api.loadbalancer.healthcheck.timeout=3s" networks: - api-net + profiles: + - green node: image: prom/node-exporter diff --git a/docker/prod/api/rollback.sh b/docker/prod/api/rollback.sh new file mode 100755 index 00000000..329fb85c --- /dev/null +++ b/docker/prod/api/rollback.sh @@ -0,0 +1,101 @@ +#!/bin/bash +set -e + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +COMPOSE_FILE="$SCRIPT_DIR/docker-compose-prod.yml" +HEALTH_TIMEOUT=120 +HEALTH_INTERVAL=5 + +log_info() { echo "[INFO] $1"; } +log_warn() { echo "[WARN] $1"; } +log_error() { echo "[ERROR] $1"; } + +get_active() { + if docker ps -q -f name=kokomen-api-blue | grep -q .; then + echo "blue" + elif docker ps -q -f name=kokomen-api-green | grep -q .; then + echo "green" + else + echo "none" + fi +} + +check_container_exists() { + local container=$1 + if docker ps -a -q -f name="$container" | grep -q .; then + return 0 + else + return 1 + fi +} + +wait_healthy() { + local container=$1 + local elapsed=0 + + log_info "헬스체크 대기: $container" + + while [ $elapsed -lt $HEALTH_TIMEOUT ]; do + status=$(docker inspect --format='{{.State.Health.Status}}' "$container" 2>/dev/null || echo "starting") + if [ "$status" = "healthy" ]; then + echo "" + log_info "헬스체크 통과!" + return 0 + fi + sleep $HEALTH_INTERVAL + elapsed=$((elapsed + HEALTH_INTERVAL)) + echo -n "." + done + + echo "" + log_error "헬스체크 실패" + return 1 +} + +main() { + log_info "========== 롤백 시작 ==========" + + CURRENT=$(get_active) + log_info "현재 활성 환경: $CURRENT" + + if [ "$CURRENT" = "blue" ]; then + ROLLBACK_TARGET="green" + elif [ "$CURRENT" = "green" ]; then + ROLLBACK_TARGET="blue" + else + log_error "현재 활성 환경을 확인할 수 없습니다" + exit 1 + fi + + log_info "롤백 대상: $ROLLBACK_TARGET" + + # 롤백 대상 컨테이너 확인 + if check_container_exists "kokomen-api-$ROLLBACK_TARGET"; then + # 컨테이너가 존재하면 시작 + log_info "기존 컨테이너 시작" + docker start "kokomen-api-$ROLLBACK_TARGET" || true + else + # 컨테이너가 없으면 새로 생성 + log_info "롤백 대상 컨테이너 생성" + sudo -E docker compose -f $COMPOSE_FILE --profile $ROLLBACK_TARGET up -d "kokomen-api-$ROLLBACK_TARGET" + fi + + # 헬스체크 + if ! wait_healthy "kokomen-api-$ROLLBACK_TARGET"; then + log_error "롤백 실패: 헬스체크 실패" + exit 1 + fi + + # Traefik 라우팅 안정화 + sleep 5 + + # 현재 활성 컨테이너 종료 + log_info "현재 컨테이너 종료: kokomen-api-$CURRENT" + docker stop -t 65 "kokomen-api-$CURRENT" || true + docker rm -f "kokomen-api-$CURRENT" 2>/dev/null || true + + log_info "========== 롤백 완료 ==========" + log_info "활성 환경: $ROLLBACK_TARGET" +} + +main "$@" diff --git a/docker/prod/api/traefik/traefik.yml b/docker/prod/api/traefik/traefik.yml new file mode 100644 index 00000000..413eb693 --- /dev/null +++ b/docker/prod/api/traefik/traefik.yml @@ -0,0 +1,25 @@ +api: + dashboard: false + +entryPoints: + web: + address: ":80" + +providers: + docker: + endpoint: "unix:///var/run/docker.sock" + exposedByDefault: false + network: api-net + watch: true + +accessLog: + format: json + fields: + headers: + names: + X-Real-IP: keep + X-Request-ID: keep + +log: + level: INFO + format: json diff --git a/src/main/java/com/samhap/kokomen/global/logging/RateLimitingFilter.java b/src/main/java/com/samhap/kokomen/global/logging/RateLimitingFilter.java new file mode 100644 index 00000000..6a4d791e --- /dev/null +++ b/src/main/java/com/samhap/kokomen/global/logging/RateLimitingFilter.java @@ -0,0 +1,73 @@ +package com.samhap.kokomen.global.logging; + +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.core.filter.Filter; +import ch.qos.logback.core.spi.FilterReply; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Slack 알림 전용 Rate Limiting Filter. + * 동일한 에러 메시지에 대해 일정 시간(cooldown) 동안 중복 알림을 방지합니다. + * Appender-level filter로 동작하여 FILE 로그에는 영향을 주지 않습니다. + */ +public class RateLimitingFilter extends Filter { + + private static final int MAX_CACHE_SIZE = 1000; + + private final ConcurrentHashMap lastLogTimes = new ConcurrentHashMap<>(); + private long cooldownMillis = 60000; + + public void setCooldownSeconds(int seconds) { + this.cooldownMillis = seconds * 1000L; + } + + @Override + public FilterReply decide(ILoggingEvent event) { + if (event.getLevel() != Level.ERROR) { + return FilterReply.NEUTRAL; + } + + String errorKey = buildErrorKey(event); + long now = System.currentTimeMillis(); + + AtomicBoolean allowed = new AtomicBoolean(false); + lastLogTimes.compute(errorKey, (key, lastTime) -> { + if (lastTime == null || (now - lastTime) >= cooldownMillis) { + allowed.set(true); + return now; + } + return lastTime; + }); + + if (lastLogTimes.size() > MAX_CACHE_SIZE) { + cleanupOldEntries(now); + } + + return allowed.get() ? FilterReply.NEUTRAL : FilterReply.DENY; + } + + private String buildErrorKey(ILoggingEvent event) { + StringBuilder key = new StringBuilder(); + key.append(event.getLoggerName()); + + String message = event.getMessage(); + if (message != null) { + key.append(":").append(message.hashCode()); + } + + if (event.getThrowableProxy() != null) { + key.append(":").append(event.getThrowableProxy().getClassName()); + } + + return key.toString(); + } + + private void cleanupOldEntries(long now) { + lastLogTimes.entrySet().removeIf( + entry -> (now - entry.getValue()) > cooldownMillis * 10 + ); + } +} diff --git a/src/main/java/com/samhap/kokomen/global/logging/SlackErrorLayout.java b/src/main/java/com/samhap/kokomen/global/logging/SlackErrorLayout.java new file mode 100644 index 00000000..c477f206 --- /dev/null +++ b/src/main/java/com/samhap/kokomen/global/logging/SlackErrorLayout.java @@ -0,0 +1,61 @@ +package com.samhap.kokomen.global.logging; + +import ch.qos.logback.classic.spi.ILoggingEvent; +import ch.qos.logback.classic.spi.IThrowableProxy; +import ch.qos.logback.classic.spi.ThrowableProxyUtil; +import ch.qos.logback.core.LayoutBase; + +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; + +public class SlackErrorLayout extends LayoutBase { + + private static final int MAX_STACK_TRACE_LENGTH = 2000; + private static final DateTimeFormatter DATE_FORMAT = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone(ZoneId.systemDefault()); + + private String environment = "UNKNOWN"; + + public void setEnvironment(String environment) { + this.environment = environment; + } + + @Override + public String doLayout(ILoggingEvent event) { + StringBuilder sb = new StringBuilder(); + + sb.append(":rotating_light: *[").append(environment.toUpperCase()).append("]* 서버 에러 발생\n\n"); + + String requestId = event.getMDCPropertyMap().get("requestId"); + if (requestId != null && !requestId.isEmpty()) { + sb.append(":label: *Request ID:* `").append(requestId).append("`\n"); + } + + sb.append(":clock3: *Time:* ").append(DATE_FORMAT.format(Instant.ofEpochMilli(event.getTimeStamp()))).append("\n"); + sb.append(":page_facing_up: *Logger:* `").append(getShortLoggerName(event.getLoggerName())).append("`\n\n"); + + sb.append(":speech_balloon: *Message:*\n```").append(event.getFormattedMessage()).append("```\n"); + + IThrowableProxy throwableProxy = event.getThrowableProxy(); + if (throwableProxy != null) { + sb.append("\n:bug: *Stack Trace:*\n```"); + String stackTrace = ThrowableProxyUtil.asString(throwableProxy); + if (stackTrace.length() > MAX_STACK_TRACE_LENGTH) { + stackTrace = stackTrace.substring(0, MAX_STACK_TRACE_LENGTH) + "\n... (truncated)"; + } + sb.append(stackTrace); + sb.append("```"); + } + + return sb.toString(); + } + + private String getShortLoggerName(String loggerName) { + if (loggerName == null) { + return ""; + } + int lastDot = loggerName.lastIndexOf('.'); + return lastDot > 0 ? loggerName.substring(lastDot + 1) : loggerName; + } +} diff --git a/src/main/resources/application-dev.yml b/src/main/resources/application-dev.yml index 2a2a7f74..31772b1c 100644 --- a/src/main/resources/application-dev.yml +++ b/src/main/resources/application-dev.yml @@ -28,3 +28,7 @@ retry: initial-interval: 500 multiplier: 2.0 max-interval: 2000 +slack: + webhook-url: ${SLACK_WEBHOOK_URL_DEV} + channel: "#kokomen-dev-alerts" + environment: DEV diff --git a/src/main/resources/application-prod.yml b/src/main/resources/application-prod.yml index cb1ef7fe..5f0ba1c8 100644 --- a/src/main/resources/application-prod.yml +++ b/src/main/resources/application-prod.yml @@ -28,3 +28,7 @@ retry: initial-interval: 500 multiplier: 2.0 max-interval: 2000 +slack: + webhook-url: ${SLACK_WEBHOOK_URL_PROD} + channel: "#kokomen-prod-alerts" + environment: PROD diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml index f7469354..a4c49c11 100644 --- a/src/main/resources/logback-spring.xml +++ b/src/main/resources/logback-spring.xml @@ -2,6 +2,9 @@ + + + @@ -26,6 +29,34 @@ + + + ${slackWebhookUrl} + ${slackChannel} + Kokomen-Alert-Bot + :rotating_light: + true + + ${slackEnvironment} + + + + + + + + ERROR + + + + 60 + + 500 + 0 + true + true + + @@ -33,17 +64,19 @@ - + + - + +