From 2c7b2d8684de724085fec86be2de5fed629148e2 Mon Sep 17 00:00:00 2001 From: ddingjoo Date: Thu, 22 Jan 2026 13:38:24 +0900 Subject: [PATCH] =?UTF-8?q?feat(news):=20AI=20=EB=89=B4=EC=8A=A4=20?= =?UTF-8?q?=EB=B6=84=EC=84=9D=20=EC=8B=9C=EC=8A=A4=ED=85=9C=20=EA=B5=AC?= =?UTF-8?q?=ED=98=84=20(#387)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - NewsAnalysisService: AI 분석 통합 서비스 - Bedrock: CEFR 난이도 분석 (A1~C2) - Bedrock: 3줄 요약 + 퀴즈 3문제 생성 - Comprehend: 핵심 키워드 추출 - NewsCollectorService: 수집 시 자동 분석 연동 - GSI1/GSI2 키 자동 설정 (레벨별, 카테고리별 조회) --- .../news/service/NewsAnalysisService.java | 321 ++++++++++++++++++ .../news/service/NewsCollectorService.java | 16 +- 2 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/news/service/NewsAnalysisService.java diff --git a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/news/service/NewsAnalysisService.java b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/news/service/NewsAnalysisService.java new file mode 100644 index 00000000..af23fc5b --- /dev/null +++ b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/news/service/NewsAnalysisService.java @@ -0,0 +1,321 @@ +package com.mzc.secondproject.serverless.domain.news.service; + +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import com.mzc.secondproject.serverless.common.config.AwsClients; +import com.mzc.secondproject.serverless.domain.news.model.KeywordInfo; +import com.mzc.secondproject.serverless.domain.news.model.NewsArticle; +import com.mzc.secondproject.serverless.domain.news.model.QuizQuestion; +import com.mzc.secondproject.serverless.domain.news.repository.NewsArticleRepository; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.SdkBytes; +import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelRequest; +import software.amazon.awssdk.services.bedrockruntime.model.InvokeModelResponse; +import software.amazon.awssdk.services.comprehend.model.*; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * 뉴스 AI 분석 서비스 + * - CEFR 난이도 분석 (Bedrock) + * - 3줄 요약 생성 (Bedrock) + * - 핵심 단어 추출 (Comprehend) + * - 퀴즈 생성 (Bedrock) + */ +public class NewsAnalysisService { + + private static final Logger logger = LoggerFactory.getLogger(NewsAnalysisService.class); + private static final Gson gson = new Gson(); + private static final String MODEL_ID = "anthropic.claude-3-haiku-20240307-v1:0"; + + private final NewsArticleRepository articleRepository; + + public NewsAnalysisService() { + this.articleRepository = new NewsArticleRepository(); + } + + public NewsAnalysisService(NewsArticleRepository articleRepository) { + this.articleRepository = articleRepository; + } + + /** + * 뉴스 기사 전체 분석 + */ + public NewsArticle analyzeArticle(NewsArticle article) { + logger.info("뉴스 분석 시작: {}", article.getArticleId()); + long startTime = System.currentTimeMillis(); + + String content = article.getTitle() + ". " + + (article.getSummary() != null ? article.getSummary() : ""); + + try { + // 1. CEFR 난이도 분석 + String cefrLevel = analyzeDifficulty(content); + article.setCefrLevel(cefrLevel); + article.setLevel(mapCefrToLevel(cefrLevel)); + + // 2. 핵심 단어 추출 (Comprehend) + List keywords = extractKeywords(content); + article.setKeywords(keywords); + + // 3. 3줄 요약 + 퀴즈 생성 (Bedrock - 한 번에 처리) + AnalysisResult result = generateSummaryAndQuiz(content, cefrLevel); + if (result.summary() != null) { + article.setSummary(result.summary()); + } + article.setQuiz(result.quiz()); + article.setHighlightWords(result.highlightWords()); + + // 4. GSI 키 설정 + article.setGsi1pk("LEVEL#" + article.getLevel()); + article.setGsi1sk(article.getPublishedAt()); + if (article.getCategory() != null) { + article.setGsi2pk("CATEGORY#" + article.getCategory()); + article.setGsi2sk(article.getPublishedAt()); + } + + // 5. 저장 + articleRepository.save(article); + + long elapsed = System.currentTimeMillis() - startTime; + logger.info("뉴스 분석 완료: {} ({}ms)", article.getArticleId(), elapsed); + + } catch (Exception e) { + logger.error("뉴스 분석 실패: {}", article.getArticleId(), e); + // 분석 실패해도 기본값으로 저장 + article.setLevel("INTERMEDIATE"); + article.setCefrLevel("B1"); + articleRepository.save(article); + } + + return article; + } + + /** + * CEFR 난이도 분석 (Bedrock) + */ + private String analyzeDifficulty(String content) { + String systemPrompt = """ + You are an English language expert. Analyze the text and determine its CEFR level. + Consider vocabulary complexity, sentence structure, and topic familiarity. + + Respond with ONLY the CEFR level code: A1, A2, B1, B2, C1, or C2 + No explanation, just the level code. + """; + + String userPrompt = "Determine the CEFR level of this text:\n\n" + truncate(content, 1000); + + String response = invokeBedrock(systemPrompt, userPrompt); + String level = response.trim().toUpperCase(); + + // 유효한 레벨인지 확인 + if (List.of("A1", "A2", "B1", "B2", "C1", "C2").contains(level)) { + return level; + } + + // 레벨 추출 시도 + for (String validLevel : List.of("C2", "C1", "B2", "B1", "A2", "A1")) { + if (response.toUpperCase().contains(validLevel)) { + return validLevel; + } + } + + return "B1"; // 기본값 + } + + /** + * CEFR을 3단계 레벨로 매핑 + */ + private String mapCefrToLevel(String cefrLevel) { + return switch (cefrLevel) { + case "A1", "A2" -> "BEGINNER"; + case "B1", "B2" -> "INTERMEDIATE"; + case "C1", "C2" -> "ADVANCED"; + default -> "INTERMEDIATE"; + }; + } + + /** + * 핵심 단어 추출 (Comprehend) + */ + private List extractKeywords(String content) { + try { + DetectKeyPhrasesResponse response = AwsClients.comprehend().detectKeyPhrases( + DetectKeyPhrasesRequest.builder() + .text(truncate(content, 5000)) + .languageCode(LanguageCode.EN) + .build() + ); + + List keywords = new ArrayList<>(); + List phrases = response.keyPhrases(); + + for (int i = 0; i < Math.min(phrases.size(), 10); i++) { + KeyPhrase phrase = phrases.get(i); + if (phrase.score() > 0.8) { + keywords.add(KeywordInfo.builder() + .word(phrase.text()) + .position(i) + .build()); + } + } + + return keywords; + + } catch (Exception e) { + logger.error("키워드 추출 실패", e); + return new ArrayList<>(); + } + } + + /** + * 요약 + 퀴즈 생성 (Bedrock) + */ + private AnalysisResult generateSummaryAndQuiz(String content, String cefrLevel) { + String systemPrompt = """ + You are an English learning assistant. Analyze the news article and create learning materials. + + Respond in this exact JSON format: + { + "summary": "3-line summary in English (each line separated by newline)", + "highlightWords": ["word1", "word2", "word3"], + "quiz": [ + { + "questionId": "q1", + "type": "COMPREHENSION", + "question": "What is the main topic of this article?", + "options": ["Option A", "Option B", "Option C", "Option D"], + "correctAnswer": "Option A", + "points": 20 + }, + { + "questionId": "q2", + "type": "WORD_MATCH", + "question": "What does 'X' mean in this context?", + "options": ["meaning1", "meaning2", "meaning3", "meaning4"], + "correctAnswer": "meaning1", + "points": 15 + }, + { + "questionId": "q3", + "type": "FILL_BLANK", + "question": "The article mentions that _____ is important.", + "options": ["word1", "word2", "word3", "word4"], + "correctAnswer": "word1", + "points": 30 + } + ] + } + + Create exactly 3 quiz questions. + highlightWords should contain 3-5 difficult words for learners. + Adjust difficulty based on CEFR level: """ + cefrLevel; + + String userPrompt = "Create learning materials for this article:\n\n" + truncate(content, 1500); + + try { + String response = invokeBedrock(systemPrompt, userPrompt); + return parseAnalysisResult(response); + } catch (Exception e) { + logger.error("요약/퀴즈 생성 실패", e); + return new AnalysisResult(null, new ArrayList<>(), new ArrayList<>()); + } + } + + /** + * Bedrock API 호출 + */ + private String invokeBedrock(String systemPrompt, String userPrompt) { + JsonObject requestBody = new JsonObject(); + requestBody.addProperty("anthropic_version", "bedrock-2023-05-31"); + requestBody.addProperty("max_tokens", 2000); + requestBody.addProperty("system", systemPrompt); + + JsonArray messages = new JsonArray(); + JsonObject userMessage = new JsonObject(); + userMessage.addProperty("role", "user"); + userMessage.addProperty("content", userPrompt); + messages.add(userMessage); + requestBody.add("messages", messages); + + InvokeModelRequest request = InvokeModelRequest.builder() + .modelId(MODEL_ID) + .contentType("application/json") + .accept("application/json") + .body(SdkBytes.fromUtf8String(gson.toJson(requestBody))) + .build(); + + InvokeModelResponse response = AwsClients.bedrock().invokeModel(request); + JsonObject jsonResponse = gson.fromJson(response.body().asUtf8String(), JsonObject.class); + + JsonArray contentArray = jsonResponse.getAsJsonArray("content"); + if (contentArray != null && !contentArray.isEmpty()) { + return contentArray.get(0).getAsJsonObject().get("text").getAsString(); + } + + throw new RuntimeException("Empty response from Bedrock"); + } + + /** + * 분석 결과 파싱 + */ + private AnalysisResult parseAnalysisResult(String response) { + String jsonStr = extractJson(response); + JsonObject json = gson.fromJson(jsonStr, JsonObject.class); + + String summary = json.has("summary") ? json.get("summary").getAsString() : null; + + List highlightWords = new ArrayList<>(); + if (json.has("highlightWords")) { + json.getAsJsonArray("highlightWords").forEach(e -> highlightWords.add(e.getAsString())); + } + + List quiz = new ArrayList<>(); + if (json.has("quiz")) { + json.getAsJsonArray("quiz").forEach(e -> { + JsonObject q = e.getAsJsonObject(); + List options = new ArrayList<>(); + if (q.has("options")) { + q.getAsJsonArray("options").forEach(opt -> options.add(opt.getAsString())); + } + quiz.add(QuizQuestion.builder() + .questionId(q.has("questionId") ? q.get("questionId").getAsString() : null) + .type(q.has("type") ? q.get("type").getAsString() : "COMPREHENSION") + .question(q.has("question") ? q.get("question").getAsString() : "") + .options(options) + .correctAnswer(q.has("correctAnswer") ? q.get("correctAnswer").getAsString() : "") + .points(q.has("points") ? q.get("points").getAsInt() : 20) + .build()); + }); + } + + return new AnalysisResult(summary, highlightWords, quiz); + } + + private String extractJson(String response) { + int start = response.indexOf('{'); + int end = response.lastIndexOf('}'); + if (start != -1 && end != -1 && end > start) { + return response.substring(start, end + 1); + } + return response; + } + + private String truncate(String text, int maxLength) { + if (text == null) return ""; + return text.length() > maxLength ? text.substring(0, maxLength) : text; + } + + /** + * 분석 결과 레코드 + */ + private record AnalysisResult( + String summary, + List highlightWords, + List quiz + ) {} +} diff --git a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/news/service/NewsCollectorService.java b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/news/service/NewsCollectorService.java index 9842f4b3..ecac47df 100644 --- a/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/news/service/NewsCollectorService.java +++ b/ServerlessFunction/src/main/java/com/mzc/secondproject/serverless/domain/news/service/NewsCollectorService.java @@ -27,19 +27,23 @@ public class NewsCollectorService { private final RssFeedParser rssFeedParser; private final NewsDuplicateChecker duplicateChecker; private final NewsArticleRepository articleRepository; + private final NewsAnalysisService analysisService; public NewsCollectorService() { this.rssFeedParser = new RssFeedParser(); this.duplicateChecker = new NewsDuplicateChecker(); this.articleRepository = new NewsArticleRepository(); + this.analysisService = new NewsAnalysisService(); } public NewsCollectorService(RssFeedParser rssFeedParser, NewsDuplicateChecker duplicateChecker, - NewsArticleRepository articleRepository) { + NewsArticleRepository articleRepository, + NewsAnalysisService analysisService) { this.rssFeedParser = rssFeedParser; this.duplicateChecker = duplicateChecker; this.articleRepository = articleRepository; + this.analysisService = analysisService; } /** @@ -62,18 +66,22 @@ public CollectionResult collectNews() { logger.info("중복 제거 후 {}개 기사", uniqueArticles.size()); int savedCount = 0; + int analyzedCount = 0; for (RawNewsArticle rawArticle : uniqueArticles) { try { NewsArticle article = convertToNewsArticle(rawArticle); - articleRepository.save(article); + + // AI 분석 수행 (난이도, 요약, 키워드, 퀴즈) + analysisService.analyzeArticle(article); + analyzedCount++; savedCount++; } catch (Exception e) { - logger.error("기사 저장 실패: {}", rawArticle.getTitle(), e); + logger.error("기사 처리 실패: {}", rawArticle.getTitle(), e); } } long elapsed = System.currentTimeMillis() - startTime; - logger.info("뉴스 수집 완료 - 저장: {}, 소요시간: {}ms", savedCount, elapsed); + logger.info("뉴스 수집/분석 완료 - 저장: {}, 분석: {}, 소요시간: {}ms", savedCount, analyzedCount, elapsed); return new CollectionResult(rssArticles.size(), savedCount, elapsed); }