diff --git a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/dto/request/ResetRequest.java b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/dto/request/ResetRequest.java new file mode 100644 index 00000000..8f600c66 --- /dev/null +++ b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/dto/request/ResetRequest.java @@ -0,0 +1,12 @@ +package com.mzc.secondproject.serverless.domain.speaking.dto.request; + +/** + * 대화 초기화 요청 DTO + */ +public record ResetRequest( + String sessionId +) { + public boolean isValid() { + return sessionId != null && !sessionId.isEmpty(); + } +} \ No newline at end of file diff --git a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/dto/request/SpeakingRequest.java b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/dto/request/SpeakingRequest.java new file mode 100644 index 00000000..58ad78c1 --- /dev/null +++ b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/dto/request/SpeakingRequest.java @@ -0,0 +1,32 @@ +package com.mzc.secondproject.serverless.domain.speaking.dto.request; + +/** + * Speaking API 요청 DTO + */ +public record SpeakingRequest( + String sessionId, // 세션 ID (첫 요청 시 null) + String audio, // 음성 데이터 (base64) + String text, // 텍스트 입력 + String level // 레벨 (BEGINNER, INTERMEDIATE, ADVANCED) +) { + /** + * 기본값 적용된 레벨 반환 + */ + public String getLevelOrDefault() { + return level != null && !level.isEmpty() ? level : "INTERMEDIATE"; + } + + /** + * 음성 입력인지 확인 + */ + public boolean hasAudio() { + return audio != null && !audio.isEmpty(); + } + + /** + * 텍스트 입력인지 확인 + */ + public boolean hasText() { + return text != null && !text.trim().isEmpty(); + } +} \ No newline at end of file diff --git a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/dto/response/SpeakingResponse.java b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/dto/response/SpeakingResponse.java new file mode 100644 index 00000000..49d714dd --- /dev/null +++ b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/dto/response/SpeakingResponse.java @@ -0,0 +1,12 @@ +package com.mzc.secondproject.serverless.domain.speaking.dto.response; + +/** + * Speaking API 응답 DTO + */ +public record SpeakingResponse( + String sessionId, // 세션 ID (다음 요청에 사용) + String userTranscript, // 사용자가 말한 내용 (STT 결과) + String aiText, // AI 응답 텍스트 + String aiAudioUrl, // AI 응답 음성 URL (Polly) + double confidence // STT 신뢰도 +) {} \ No newline at end of file diff --git a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/handler/websocket/SpeakingHandler.java b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/handler/websocket/SpeakingHandler.java new file mode 100644 index 00000000..69375925 --- /dev/null +++ b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/handler/websocket/SpeakingHandler.java @@ -0,0 +1,157 @@ +package com.mzc.secondproject.serverless.domain.speaking.handler.websocket; + +import com.amazonaws.services.lambda.runtime.Context; +import com.amazonaws.services.lambda.runtime.RequestHandler; +import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyRequestEvent; +import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyResponseEvent; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import com.mzc.secondproject.serverless.common.util.JwtUtil; +import com.mzc.secondproject.serverless.domain.speaking.service.SpeakingService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.Optional; + +/** + * Speaking API 핸들러 + * + * POST /api/speaking/chat - 대화 (음성 또는 텍스트) + * POST /api/speaking/reset - 대화 초기화 + */ +public class SpeakingHandler implements RequestHandler { + + private static final Logger logger = LoggerFactory.getLogger(SpeakingHandler.class); + private static final Gson gson = new GsonBuilder().create(); + + private static final Map CORS_HEADERS = Map.of( + "Content-Type", "application/json", + "Access-Control-Allow-Origin", "*", + "Access-Control-Allow-Headers", "Content-Type,Authorization", + "Access-Control-Allow-Methods", "POST,OPTIONS" + ); + + private final SpeakingService speakingService; + + public SpeakingHandler() { + this.speakingService = new SpeakingService(); + } + + @Override + public APIGatewayProxyResponseEvent handleRequest(APIGatewayProxyRequestEvent event, Context context) { + logger.info("Speaking API request received"); + + // OPTIONS 요청 처리 (CORS preflight) + if ("OPTIONS".equalsIgnoreCase(event.getHttpMethod())) { + return response(200, Map.of("message", "OK")); + } + + try { + // JWT 토큰 검증 + String authHeader = event.getHeaders().get("Authorization"); + if (authHeader == null || !authHeader.startsWith("Bearer ")) { + return response(401, Map.of("error", "Authorization header is required")); + } + + String token = authHeader.substring(7); + if (!JwtUtil.isValid(token)) { + return response(401, Map.of("error", "Invalid or expired token")); + } + + Optional userIdOpt = JwtUtil.extractUserId(token); + if (userIdOpt.isEmpty()) { + return response(401, Map.of("error", "Invalid token")); + } + + String userId = userIdOpt.get(); + String path = event.getPath(); + String body = event.getBody(); + + logger.info("Processing request: path={}, userId={}", path, userId); + + // 라우팅 + if (path.endsWith("/chat")) { + return handleChat(userId, body); + } else if (path.endsWith("/reset")) { + return handleReset(userId, body); + } else { + return response(404, Map.of("error", "Not found")); + } + + } catch (Exception e) { + logger.error("Error processing request: {}", e.getMessage(), e); + return response(500, Map.of("error", "Internal server error: " + e.getMessage())); + } + } + + /** + * 대화 처리 (음성 또는 텍스트) + */ + private APIGatewayProxyResponseEvent handleChat(String userId, String body) { + if (body == null || body.isEmpty()) { + return response(400, Map.of("error", "Request body is required")); + } + + JsonObject request = JsonParser.parseString(body).getAsJsonObject(); + + String sessionId = request.has("sessionId") ? request.get("sessionId").getAsString() : null; + String level = request.has("level") ? request.get("level").getAsString() : "INTERMEDIATE"; + String audio = request.has("audio") ? request.get("audio").getAsString() : null; + String text = request.has("text") ? request.get("text").getAsString() : null; + + SpeakingService.SpeakingResponse result; + + if (audio != null && !audio.isEmpty()) { + // 음성 입력 처리 + logger.info("Processing voice input"); + result = speakingService.processVoiceInput(sessionId, userId, audio, level); + } else if (text != null && !text.trim().isEmpty()) { + // 텍스트 입력 처리 + logger.info("Processing text input: {}", text); + result = speakingService.processTextInput(sessionId, userId, text.trim(), level); + } else { + return response(400, Map.of("error", "Either 'audio' or 'text' is required")); + } + + return response(200, Map.of( + "sessionId", result.sessionId(), + "userTranscript", result.userTranscript(), + "aiText", result.aiText(), + "aiAudioUrl", result.aiAudioUrl(), + "confidence", result.confidence() + )); + } + + /** + * 대화 초기화 + */ + private APIGatewayProxyResponseEvent handleReset(String userId, String body) { + if (body == null || body.isEmpty()) { + return response(400, Map.of("error", "Request body is required")); + } + + JsonObject request = JsonParser.parseString(body).getAsJsonObject(); + String sessionId = request.has("sessionId") ? request.get("sessionId").getAsString() : null; + + if (sessionId == null || sessionId.isEmpty()) { + return response(400, Map.of("error", "sessionId is required")); + } + + speakingService.resetConversation(sessionId); + + return response(200, Map.of( + "message", "Conversation reset successfully", + "sessionId", sessionId + )); + } + + private APIGatewayProxyResponseEvent response(int statusCode, Map body) { + return new APIGatewayProxyResponseEvent() + .withStatusCode(statusCode) + .withHeaders(CORS_HEADERS) + .withBody(gson.toJson(body)); + } +} diff --git a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/model/SpeakingSession.java b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/model/SpeakingSession.java new file mode 100644 index 00000000..07956b2f --- /dev/null +++ b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/model/SpeakingSession.java @@ -0,0 +1,96 @@ +package com.mzc.secondproject.serverless.domain.speaking.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import software.amazon.awssdk.enhanced.dynamodb.mapper.annotations.*; + +/** + * Speaking WebSocket 연결 정보 + * connectionId ↔ userId 매핑 + 대화 히스토리 저장 + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +@DynamoDbBean +public class SpeakingSession { + + // DynamoDB Key Prefixes + public static final String PK_PREFIX = "SPEAKING_SESSION#"; + public static final String SK_METADATA = "METADATA"; + public static final String GSI1PK_PREFIX = "SPEAKING_USER#"; + public static final String GSI1SK_PREFIX = "SESSION#"; + + private String pk; // SPEAKING_SESSION#{sessionId} + private String sk; // METADATA + private String gsi1pk; // SPEAKING_USER#{userId} + private String gsi1sk; // SESSION#{sessionId} + + private String sessionId; + private String userId; + private String createdAt; + private String updatedAt; + private Long ttl; // 자동 삭제용 (24시간) + + // Speaking 전용 필드 + private String conversationHistory; // 대화 히스토리 (JSON) + private String targetLevel; // 목표 레벨 (BEGINNER, INTERMEDIATE, ADVANCED) + + /** + * 세션 생성 팩토리 메서드 + */ + public static SpeakingSession create(String sessionId, String userId, String level) { + String now = java.time.Instant.now().toString(); + // 24시간 후 자동 삭제 + long ttl = java.time.Instant.now().plusSeconds(86400).getEpochSecond(); + + return SpeakingSession.builder() + .pk(PK_PREFIX + sessionId) + .sk(SK_METADATA) + .gsi1pk(GSI1PK_PREFIX + userId) + .gsi1sk(GSI1SK_PREFIX + sessionId) + .sessionId(sessionId) + .userId(userId) + .createdAt(now) + .updatedAt(now) + .ttl(ttl) + .conversationHistory("[]") + .targetLevel(level != null ? level.toUpperCase() : "INTERMEDIATE") + .build(); + } + + /** + * 업데이트 시간 갱신 + */ + public void touch() { + this.updatedAt = java.time.Instant.now().toString(); + // TTL 연장 (24시간) + this.ttl = java.time.Instant.now().plusSeconds(86400).getEpochSecond(); + } + + @DynamoDbPartitionKey + @DynamoDbAttribute("PK") + public String getPk() { + return pk; + } + + @DynamoDbSortKey + @DynamoDbAttribute("SK") + public String getSk() { + return sk; + } + + @DynamoDbSecondaryPartitionKey(indexNames = "GSI1") + @DynamoDbAttribute("GSI1PK") + public String getGsi1pk() { + return gsi1pk; + } + + @DynamoDbSecondarySortKey(indexNames = "GSI1") + @DynamoDbAttribute("GSI1SK") + public String getGsi1sk() { + return gsi1sk; + } +} \ No newline at end of file diff --git a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/repository/SpeakingSessionRepository.java b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/repository/SpeakingSessionRepository.java new file mode 100644 index 00000000..fed1cd66 --- /dev/null +++ b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/repository/SpeakingSessionRepository.java @@ -0,0 +1,74 @@ +package com.mzc.secondproject.serverless.domain.speaking.repository; + +import com.mzc.secondproject.serverless.common.config.AwsClients; +import com.mzc.secondproject.serverless.common.config.EnvConfig; +import com.mzc.secondproject.serverless.domain.speaking.model.SpeakingSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.enhanced.dynamodb.DynamoDbTable; +import software.amazon.awssdk.enhanced.dynamodb.Key; +import software.amazon.awssdk.enhanced.dynamodb.TableSchema; + +import java.util.Optional; + +/** + * Speaking WebSocket 연결 정보 Repository + */ +public class SpeakingSessionRepository { + + private static final Logger logger = LoggerFactory.getLogger(SpeakingSessionRepository.class); + private static final String TABLE_NAME = EnvConfig.getRequired("CHAT_TABLE_NAME"); + + private final DynamoDbTable table; + + public SpeakingSessionRepository() { + this.table = AwsClients.dynamoDbEnhanced().table( + TABLE_NAME, + TableSchema.fromBean(SpeakingSession.class) + ); + } + + /** + * 연결 정보 저장 + */ + public void save(SpeakingSession session) { + table.putItem(session); + logger.debug("Speaking session saved: sessionId={}, userId={}", + session.getSessionId(), session.getUserId()); + } + + /** + * sessionId로 연결 정보 조회 + */ + public Optional findBySessionId(String sessionId) { + Key key = Key.builder() + .partitionValue(SpeakingSession.PK_PREFIX + sessionId) + .sortValue(SpeakingSession.SK_METADATA) + .build(); + + SpeakingSession session = table.getItem(key); + return Optional.ofNullable(session); + } + + /** + * 연결 정보 업데이트 (대화 히스토리 등) + */ + public void update(SpeakingSession session) { + session.touch(); // 업데이트 시간 및 TTL 갱신 + table.putItem(session); + logger.debug("Speaking session updated: sessionId={}", session.getSessionId()); + } + + /** + * 연결 정보 삭제 + */ + public void delete(String sessionId) { + Key key = Key.builder() + .partitionValue(SpeakingSession.PK_PREFIX + sessionId) + .sortValue(SpeakingSession.SK_METADATA) + .build(); + + table.deleteItem(key); + logger.info("Speaking session deleted: sessionId={}", sessionId); + } +} \ No newline at end of file diff --git a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/service/SpeakingService.java b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/service/SpeakingService.java index 7c428ddc..3dffac92 100644 --- a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/service/SpeakingService.java +++ b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/speaking/service/SpeakingService.java @@ -5,236 +5,264 @@ import com.mzc.secondproject.serverless.common.config.EnvConfig; import com.mzc.secondproject.serverless.common.service.PollyService; import com.mzc.secondproject.serverless.domain.opic.service.TranscribeProxyService; -import com.mzc.secondproject.serverless.domain.speaking.model.SpeakingConnection; -import com.mzc.secondproject.serverless.domain.speaking.repository.SpeakingConnectionRepository; +import com.mzc.secondproject.serverless.domain.speaking.model.SpeakingSession; +import com.mzc.secondproject.serverless.domain.speaking.repository.SpeakingSessionRepository; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.core.SdkBytes; import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelRequest; import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelResponse; + import java.util.ArrayList; import java.util.List; +import java.util.UUID; /** * AI와 대화하기 서비스 * 음성 입력 → STT → Bedrock → TTS → 음성 출력 */ public class SpeakingService { - - private static final Logger logger = LoggerFactory.getLogger(SpeakingService.class); - private static final Gson gson = new GsonBuilder().create(); - - private static final String MODEL_ID = "anthropic.claude-3-haiku-20240307-v1:0"; - private static final int MAX_TOKENS = 500; - private static final int MAX_HISTORY_SIZE = 10; // 최근 10턴만 유지 - - private final TranscribeProxyService transcribeService; - private final PollyService pollyService; - private final SpeakingConnectionRepository connectionRepository; - - public SpeakingService() { - this.transcribeService = new TranscribeProxyService(); - this.pollyService = new PollyService( - EnvConfig.getRequired("BUCKET_NAME"), - "speaking/voice/" - ); - this.connectionRepository = new SpeakingConnectionRepository(); - } - - /** - * 음성 입력 처리 (전체 플로우) - */ - public SpeakingResponse processVoiceInput(String connectionId, String audioBase64) { - logger.info("Processing voice input for connectionId: {}", connectionId); - - // 연결 정보 조회 - SpeakingConnection connection = connectionRepository.findByConnectionId(connectionId) - .orElseThrow(() -> new RuntimeException("Connection not found: " + connectionId)); - - String targetLevel = connection.getTargetLevel(); - - // STT: 음성 → 텍스트 (Transcribe Proxy 사용) - logger.info("Step 1: Transcribing audio..."); - TranscribeProxyService.TranscribeResult sttResult = transcribeService.transcribe( - audioBase64, - connectionId, - "en-US" - ); - String userText = sttResult.transcript(); - logger.info("Transcription complete: {} (confidence: {})", userText, sttResult.confidence()); - - // 대화 히스토리 로드 - List history = parseHistory(connection.getConversationHistory()); - - // Bedrock: AI 응답 생성 - logger.info("Step 2: Generating AI response..."); - String aiResponse = generateAiResponse(userText, history, targetLevel); - logger.info("AI response generated: {}", aiResponse); - - // 히스토리 업데이트 (최근 N턴만 유지) - history.add(new Message("user", userText)); - history.add(new Message("assistant", aiResponse)); - if (history.size() > MAX_HISTORY_SIZE * 2) { - history = new ArrayList<>(history.subList(history.size() - MAX_HISTORY_SIZE * 2, history.size())); - } - connection.setConversationHistory(toJson(history)); - connectionRepository.update(connection); - - // TTS: 텍스트 → 음성 (Polly 사용) - logger.info("Step 3: Synthesizing speech..."); - String audioId = connectionId + "_" + System.currentTimeMillis(); - PollyService.VoiceSynthesisResult ttsResult = pollyService.synthesizeSpeech( - audioId, - aiResponse, - "FEMALE" - ); - logger.info("Speech synthesis complete: cached={}", ttsResult.isCached()); - - return new SpeakingResponse( - userText, - aiResponse, - ttsResult.getAudioUrl(), - sttResult.confidence() - ); - } - - /** - * 텍스트 입력 처리 (음성 없이 텍스트만) - */ - public SpeakingResponse processTextInput(String connectionId, String userText) { - logger.info("Processing text input for connectionId: {}", connectionId); - - // 연결 정보 조회 - SpeakingConnection connection = connectionRepository.findByConnectionId(connectionId) - .orElseThrow(() -> new RuntimeException("Connection not found: " + connectionId)); - - String targetLevel = connection.getTargetLevel(); - - // 대화 히스토리 로드 - List history = parseHistory(connection.getConversationHistory()); - - // AI 응답 생성 - String aiResponse = generateAiResponse(userText, history, targetLevel); - - // 히스토리 업데이트 - history.add(new Message("user", userText)); - history.add(new Message("assistant", aiResponse)); - if (history.size() > MAX_HISTORY_SIZE * 2) { - history = new ArrayList<>(history.subList(history.size() - MAX_HISTORY_SIZE * 2, history.size())); - } - connection.setConversationHistory(toJson(history)); - connectionRepository.update(connection); - - // TTS 생성 - String audioId = connectionId + "_" + System.currentTimeMillis(); - PollyService.VoiceSynthesisResult ttsResult = pollyService.synthesizeSpeech( - audioId, aiResponse, "FEMALE" - ); - - return new SpeakingResponse(userText, aiResponse, ttsResult.getAudioUrl(), 1.0); - } - - /** - * 레벨 변경 - */ - public void updateLevel(String connectionId, String level) { - SpeakingConnection connection = connectionRepository.findByConnectionId(connectionId) - .orElseThrow(() -> new RuntimeException("Connection not found: " + connectionId)); - - connection.setTargetLevel(level.toUpperCase()); - connectionRepository.update(connection); - logger.info("Level updated for connectionId {}: {}", connectionId, level); - } - - /** - * 대화 히스토리 초기화 - */ - public void resetConversation(String connectionId) { - SpeakingConnection connection = connectionRepository.findByConnectionId(connectionId) - .orElseThrow(() -> new RuntimeException("Connection not found: " + connectionId)); - - connection.setConversationHistory("[]"); - connectionRepository.update(connection); - logger.info("Conversation reset for connectionId: {}", connectionId); - } - - - /** - * Bedrock Claude 호출하여 AI 응답 생성 - */ - private String generateAiResponse(String userText, List history, String targetLevel) { - String systemPrompt = buildSystemPrompt(targetLevel); - - JsonObject requestBody = new JsonObject(); - requestBody.addProperty("anthropic_version", "bedrock-2023-05-31"); - requestBody.addProperty("max_tokens", MAX_TOKENS); - requestBody.addProperty("system", systemPrompt); - - // 메시지 배열 구성 - JsonArray messages = new JsonArray(); - - // 기존 히스토리 추가 - for (Message msg : history) { - JsonObject m = new JsonObject(); - m.addProperty("role", msg.role()); - m.addProperty("content", msg.content()); - messages.add(m); - } - - // 현재 사용자 입력 추가 - JsonObject userMsg = new JsonObject(); - userMsg.addProperty("role", "user"); - userMsg.addProperty("content", userText); - messages.add(userMsg); - - requestBody.add("messages", messages); - - // Bedrock 호출 - InvokeModelResponse response = AwsClients.bedrock().invokeModel( - InvokeModelRequest.builder() - .modelId(MODEL_ID) - .contentType("application/json") - .body(SdkBytes.fromUtf8String(requestBody.toString())) - .build() - ); - - // 응답 파싱 - JsonObject result = JsonParser.parseString( - response.body().asUtf8String() - ).getAsJsonObject(); - - return result.getAsJsonArray("content") - .get(0).getAsJsonObject() - .get("text").getAsString(); - } - - /** - * 레벨별 시스템 프롬프트 생성 - */ - private String buildSystemPrompt(String targetLevel) { - String levelGuidance = switch (targetLevel.toUpperCase()) { - case "BEGINNER" -> """ + + private static final Logger logger = LoggerFactory.getLogger(SpeakingService.class); + private static final Gson gson = new GsonBuilder().create(); + + private static final String MODEL_ID = "anthropic.claude-3-haiku-20240307-v1:0"; + private static final int MAX_TOKENS = 500; + private static final int MAX_HISTORY_SIZE = 10; // 최근 10턴만 유지 + + private final TranscribeProxyService transcribeService; + private final PollyService pollyService; + private final SpeakingSessionRepository sessionRepository; + + /** + * 세션 생성 또는 조회 + */ + public SpeakingSession getOrCreateSession(String sessionId, String userId, String level) { + if (sessionId != null && !sessionId.isEmpty()) { + return sessionRepository.findBySessionId(sessionId) + .orElseGet(() -> createNewSession(userId, level)); + } + return createNewSession(userId, level); + } + + /** + * 새 세션 생성 + */ + private SpeakingSession createNewSession(String userId, String level) { + String newSessionId = UUID.randomUUID().toString(); + SpeakingSession session = SpeakingSession.create(newSessionId, userId, level); + sessionRepository.save(session); + logger.info("New speaking session created: sessionId={}, userId={}", newSessionId, userId); + return session; + } + + public SpeakingService() { + this.transcribeService = new TranscribeProxyService(); + this.pollyService = new PollyService( + EnvConfig.getRequired("BUCKET_NAME"), + "speaking/voice/" + ); + this.sessionRepository = new SpeakingSessionRepository(); + } + + /** + * 음성 입력 처리 (전체 플로우) + */ + public SpeakingResponse processVoiceInput(String sessionId, String userId, String audioBase64, String level) { + logger.info("Processing voice input for sessionId: {}", sessionId); + + // 세션 조회 또는 생성 + SpeakingSession session = getOrCreateSession(sessionId, userId, level); + + String targetLevel = session.getTargetLevel(); + + // STT: 음성 → 텍스트 (Transcribe Proxy 사용) + logger.info("Step 1: Transcribing audio..."); + TranscribeProxyService.TranscribeResult sttResult = transcribeService.transcribe( + audioBase64, + sessionId, + "en-US" + ); + String userText = sttResult.transcript(); + logger.info("Transcription complete: {} (confidence: {})", userText, sttResult.confidence()); + + // 대화 히스토리 로드 + List history = parseHistory(session.getConversationHistory()); + + // Bedrock: AI 응답 생성 + logger.info("Step 2: Generating AI response..."); + String aiResponse = generateAiResponse(userText, history, targetLevel); + logger.info("AI response generated: {}", aiResponse); + + // 히스토리 업데이트 (최근 N턴만 유지) + history.add(new Message("user", userText)); + history.add(new Message("assistant", aiResponse)); + if (history.size() > MAX_HISTORY_SIZE * 2) { + history = new ArrayList<>(history.subList(history.size() - MAX_HISTORY_SIZE * 2, history.size())); + } + session.setConversationHistory(toJson(history)); + sessionRepository.update(session); + + // TTS: 텍스트 → 음성 (Polly 사용) + logger.info("Step 3: Synthesizing speech..."); + String audioId = session.getSessionId() + "_" + System.currentTimeMillis(); + PollyService.VoiceSynthesisResult ttsResult = pollyService.synthesizeSpeech( + audioId, + aiResponse, + "FEMALE" + ); + logger.info("Speech synthesis complete: cached={}", ttsResult.isCached()); + + return new SpeakingResponse( + session.getSessionId(), + userText, + aiResponse, + ttsResult.getAudioUrl(), + sttResult.confidence() + ); + } + + /** + * 텍스트 입력 처리 (음성 없이 텍스트만) + */ + public SpeakingResponse processTextInput(String sessionId, String userId, String userText, String level){ + logger.info("Processing text input for sessionId: {}", sessionId); + + // 세션 조회 또는 생성 + SpeakingSession session = getOrCreateSession(sessionId, userId, level); + + // 대화 히스토리 로드 + List history = parseHistory(session.getConversationHistory()); + + // AI 응답 생성 + String aiResponse = generateAiResponse(userText, history, session.getTargetLevel()); + + // 히스토리 업데이트 + history.add(new Message("user", userText)); + history.add(new Message("assistant", aiResponse)); + if (history.size() > MAX_HISTORY_SIZE * 2) { + history = new ArrayList<>(history.subList(history.size() - MAX_HISTORY_SIZE * 2, history.size())); + } + session.setConversationHistory(toJson(history)); + sessionRepository.update(session); + + // TTS 생성 + String audioId = session.getSessionId() + "_" + System.currentTimeMillis(); + PollyService.VoiceSynthesisResult ttsResult = pollyService.synthesizeSpeech( + audioId, aiResponse, "FEMALE" + ); + + return new SpeakingResponse( + session.getSessionId(), + userText, + aiResponse, + ttsResult.getAudioUrl(), + 1.0 + ); + } + + /** + * 레벨 변경 + */ + public void updateLevel(String sessionId, String level) { + SpeakingSession session = sessionRepository.findBySessionId(sessionId) + .orElseThrow(() -> new RuntimeException("session not found: " + sessionId)); + + session.setTargetLevel(level.toUpperCase()); + sessionRepository.update(session); + logger.info("Level updated for sessionId {}: {}", sessionId, level); + } + + /** + * 대화 히스토리 초기화 + */ + public void resetConversation(String sessionId) { + SpeakingSession session = sessionRepository.findBySessionId(sessionId) + .orElseThrow(() -> new RuntimeException("session not found: " + sessionId)); + + session.setConversationHistory("[]"); + sessionRepository.update(session); + logger.info("Conversation reset for sessionId: {}", sessionId); + } + + + /** + * Bedrock Claude 호출하여 AI 응답 생성 + */ + private String generateAiResponse(String userText, List history, String targetLevel) { + String systemPrompt = buildSystemPrompt(targetLevel); + + JsonObject requestBody = new JsonObject(); + requestBody.addProperty("anthropic_version", "bedrock-2023-05-31"); + requestBody.addProperty("max_tokens", MAX_TOKENS); + requestBody.addProperty("system", systemPrompt); + + // 메시지 배열 구성 + JsonArray messages = new JsonArray(); + + // 기존 히스토리 추가 + for (Message msg : history) { + JsonObject m = new JsonObject(); + m.addProperty("role", msg.role()); + m.addProperty("content", msg.content()); + messages.add(m); + } + + // 현재 사용자 입력 추가 + JsonObject userMsg = new JsonObject(); + userMsg.addProperty("role", "user"); + userMsg.addProperty("content", userText); + messages.add(userMsg); + + requestBody.add("messages", messages); + + // Bedrock 호출 + InvokeModelResponse response = AwsClients.bedrock().invokeModel( + InvokeModelRequest.builder() + .modelId(MODEL_ID) + .contentType("application/json") + .body(SdkBytes.fromUtf8String(requestBody.toString())) + .build() + ); + + // 응답 파싱 + JsonObject result = JsonParser.parseString( + response.body().asUtf8String() + ).getAsJsonObject(); + + return result.getAsJsonArray("content") + .get(0).getAsJsonObject() + .get("text").getAsString(); + } + + /** + * 레벨별 시스템 프롬프트 생성 + */ + private String buildSystemPrompt(String targetLevel) { + String levelGuidance = switch (targetLevel.toUpperCase()) { + case "BEGINNER" -> """ - Use simple vocabulary and short sentences - Speak slowly and clearly - Use basic grammar structures - Provide Korean translations for difficult words in parentheses """; - case "ADVANCED" -> """ + case "ADVANCED" -> """ - Use sophisticated vocabulary and complex sentences - Include idiomatic expressions and phrasal verbs - Discuss abstract concepts naturally - Challenge the user with nuanced topics """; - default -> """ + default -> """ - Use moderate vocabulary appropriate for intermediate learners - Mix simple and compound sentences - Introduce useful expressions gradually - Balance challenge with accessibility """; - }; - - return String.format(""" + }; + + return String.format(""" You are a friendly English conversation partner for Korean learners. Your name is "Amy" and you're an American English teacher living in Seoul. @@ -258,61 +286,60 @@ private String buildSystemPrompt(String targetLevel) { Remember: Your goal is to make the user feel comfortable practicing English! """, targetLevel, levelGuidance); - } - - /** - * 히스토리 JSON 파싱 - */ - private List parseHistory(String historyJson) { - List history = new ArrayList<>(); - - if (historyJson == null || historyJson.isEmpty() || historyJson.equals("[]")) { - return history; - } - - try { - JsonArray array = JsonParser.parseString(historyJson).getAsJsonArray(); - for (JsonElement el : array) { - JsonObject obj = el.getAsJsonObject(); - history.add(new Message( - obj.get("role").getAsString(), - obj.get("content").getAsString() - )); - } - } catch (Exception e) { - logger.warn("Failed to parse history, starting fresh: {}", e.getMessage()); - } - - return history; - } - - /** - * 히스토리 JSON 변환 - */ - private String toJson(List history) { - JsonArray array = new JsonArray(); - for (Message msg : history) { - JsonObject obj = new JsonObject(); - obj.addProperty("role", msg.role()); - obj.addProperty("content", msg.content()); - array.add(obj); - } - return array.toString(); - } - - // ==================== Inner Classes ==================== - - private record Message(String role, String content) { - } - - /** - * Speaking 응답 DTO - */ - public record SpeakingResponse( - String userTranscript, // 사용자가 말한 내용 (STT 결과) - String aiText, // AI 응답 텍스트 - String aiAudioUrl, // AI 응답 음성 URL (Polly) - double confidence // STT 신뢰도comp - ) { - } + } + + /** + * 히스토리 JSON 파싱 + */ + private List parseHistory(String historyJson) { + List history = new ArrayList<>(); + + if (historyJson == null || historyJson.isEmpty() || historyJson.equals("[]")) { + return history; + } + + try { + JsonArray array = JsonParser.parseString(historyJson).getAsJsonArray(); + for (JsonElement el : array) { + JsonObject obj = el.getAsJsonObject(); + history.add(new Message( + obj.get("role").getAsString(), + obj.get("content").getAsString() + )); + } + } catch (Exception e) { + logger.warn("Failed to parse history, starting fresh: {}", e.getMessage()); + } + + return history; + } + + /** + * 히스토리 JSON 변환 + */ + private String toJson(List history) { + JsonArray array = new JsonArray(); + for (Message msg : history) { + JsonObject obj = new JsonObject(); + obj.addProperty("role", msg.role()); + obj.addProperty("content", msg.content()); + array.add(obj); + } + return array.toString(); + } + + // ==================== Inner Classes ==================== + + private record Message(String role, String content) {} + + /** + * Speaking 응답 DTO + */ + public record SpeakingResponse( + String sessionId, // 세션 ID (다음 요청에 사용) + String userTranscript, // 사용자가 말한 내용 (STT 결과) + String aiText, // AI 응답 텍스트 + String aiAudioUrl, // AI 응답 음성 URL (Polly) + double confidence // STT 신뢰도comp + ) {} } diff --git a/ServerlessFunction/template.yaml b/ServerlessFunction/template.yaml index 15fd4280..c846ea5e 100644 --- a/ServerlessFunction/template.yaml +++ b/ServerlessFunction/template.yaml @@ -1434,6 +1434,62 @@ Resources: Description: Daily word learning stats aggregation Enabled: true + ############################################# + # Speaking REST API (AI와 대화하기) + ############################################# + + SpeakingFunction: + Type: AWS::Serverless::Function + Properties: + FunctionName: group2-englishstudy-speaking-handler + CodeUri: . + Handler: com.mzc.secondproject.serverless.domain.speaking.handler.SpeakingHandler::handleRequest + Description: Handle Speaking AI conversation (REST API) + Timeout: 120 + MemorySize: 1024 + SnapStart: + ApplyOn: PublishedVersions + Environment: + Variables: + TRANSCRIBE_API_KEY: "/opic/transcribe-proxy-api-key" + Policies: + - DynamoDBCrudPolicy: + TableName: !Ref ChatTable + - S3CrudPolicy: + BucketName: group2-englishstudy + - Statement: + - Effect: Allow + Action: + - bedrock:InvokeModel + Resource: "*" + - Statement: + - Effect: Allow + Action: + - polly:SynthesizeSpeech + Resource: "*" + - Statement: + - Effect: Allow + Action: + - ssm:GetParameter + Resource: !Sub "arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/opic/*" + Events: + SpeakingChat: + Type: Api + Properties: + RestApiId: !Ref MainApi + Path: /api/speaking/chat + Method: POST + Auth: + Authorizer: CognitoAuthorizer + SpeakingReset: + Type: Api + Properties: + RestApiId: !Ref MainApi + Path: /api/speaking/reset + Method: POST + Auth: + Authorizer: CognitoAuthorizer + ############################################# # OPIc Lambda Functions #############################################