diff --git a/src/aws/lambda/CrawlingService.ts b/src/aws/lambda/CrawlingService.ts index 5d49b17..79631ed 100644 --- a/src/aws/lambda/CrawlingService.ts +++ b/src/aws/lambda/CrawlingService.ts @@ -3,7 +3,7 @@ import { chromium } from 'playwright-core'; import { JobRegistry } from '../../entity/job/JobRegistry'; import { Job } from '../../entity/job/Job'; import { JobExecutor } from '../../entity/job/JobExecutor'; -import { getKoreaTimeISO } from '../../utils/DateUtils'; +import { getKoreaTimeISO, formatKoreaDateISO } from '../../utils/DateUtils'; import { validateJobName } from './LambdaEventValidator'; import { TargetDate } from '../../entity/TargetDate'; import { HandleErrors } from '../../utils/ErrorHandling'; @@ -46,18 +46,80 @@ export class CrawlingService { const endTime = Date.now(); console.log(`Crawling completed in ${endTime - startTime}ms`); + + const crawlingResult = this.transformResults(executionResult, job.jobName); - return this.createCrawlingResult(executionResult); + console.log(`스크래핑 결과, items: ${crawlingResult.results.length}`); + + return crawlingResult; } finally { await this.cleanup(); } } - private createCrawlingResult(executionResult: { processedJobs: string[]; results: any[]; itemCount: number }): CrawlingResult { + /** + * 기존 중첩 구조를 Spring Batch JsonItemReader가 읽을 수 있는 평면 배열로 변환 + * 기존: { '기관명': { 'notice': [...], 'recruit': [...] } } + * 변환: [{ jobName: '기관명', category: 'notice', ...item }, ...] + */ + private transformResults(result: Record, jobName: string): CrawlingResult { + const flatResults: any[] = []; + + if (!result) { + return this.createEmptyResult(jobName); + } + + for (const [institutionName, categories] of Object.entries(result)) { + if (typeof categories === 'object' && categories !== null) { + for (const [category, items] of Object.entries(categories)) { + if (Array.isArray(items)) { + items.forEach((item) => { + // Date 객체를 한국시간 문자열로 변환 + const transformedItem = this.convertDateFieldsToKoreaTime(item); + + flatResults.push({ + jobName, + institutionName, + category, + crawledAt: getKoreaTimeISO(), + ...transformedItem, + }); + }); + } + } + } + } + + console.log(`스크래핑 결과, items: ${flatResults.length}`); + + return { + processedJobs: [jobName], + results: flatResults, + itemCount: flatResults.length, + }; + } + + /** + * 객체 내의 Date 필드들을 한국시간 ISO 문자열로 변환 + */ + private convertDateFieldsToKoreaTime(item: any): any { + const converted = { ...item }; + + // 일반적으로 사용되는 Date 필드들을 확인하고 변환 + for (const [key, value] of Object.entries(converted)) { + if (value instanceof Date) { + converted[key] = formatKoreaDateISO(value); + } + } + + return converted; + } + + private createEmptyResult(jobName: string): CrawlingResult { return { - processedJobs: executionResult.processedJobs, - results: executionResult.results, - itemCount: executionResult.itemCount, + processedJobs: [jobName], + results: [], + itemCount: 0, }; } @@ -109,7 +171,7 @@ export class CrawlingService { } @HandleErrors(OPERATION_CONTEXT.JOB_EXECUTION, ERROR_MESSAGES.JOB_EXECUTION_FAILED) - private async executeJob(job: Job, context: { targetDate: Date }) { + private async executeJob(job: Job, context: { targetDate: Date }): Promise> { const result = await this.jobExecutor!.execute(job, context); return result; } diff --git a/src/entity/Category.ts b/src/entity/Category.ts index d80fdd3..9eed05b 100644 --- a/src/entity/Category.ts +++ b/src/entity/Category.ts @@ -1,6 +1,6 @@ export enum Category { - NOTICE = 'NOTICE', - WELFARE = 'WELFARE', - RECRUIT = 'RECRUIT', - EVENT = 'EVENT', + NOTICE = 'NOTICE', // 공지사항 + WELFARE = 'WELFARE', // 복지관소식 + RECRUIT = 'RECRUIT', // 채용 + EVENT = 'EVENT', // 행사/프로그램 } diff --git a/src/entity/job/JobExecutor.ts b/src/entity/job/JobExecutor.ts index 26668bd..83b1c41 100644 --- a/src/entity/job/JobExecutor.ts +++ b/src/entity/job/JobExecutor.ts @@ -3,7 +3,6 @@ import { Job } from './Job'; import { AppError } from '../../errors/AppError'; import { ERROR_MESSAGES } from '../../constants/ErrorMessages'; import { OPERATION_CONTEXT } from '../../constants/OperationContext'; -// Removed infra dependencies '../../utils/ErrorHandling'; export interface ExecutionContext { targetDate: Date; @@ -15,12 +14,6 @@ export interface PageOptions { timeout?: number; } -export interface JobExecutionResult { - processedJobs: string[]; - results: any[]; - itemCount: number; -} - /** * Job 실행을 담당하는 클래스 * 순수 도메인 클래스 - Job 실행과 결과 변환 @@ -34,7 +27,8 @@ export class JobExecutor { this.browser = browser; } - async execute(job: Job, context: ExecutionContext): Promise { + async execute(job: Job, context: ExecutionContext): Promise> { + console.log(`${job.jobName} Job 실행 시작`); let page: Page | null = null; @@ -42,15 +36,10 @@ export class JobExecutor { try { page = await this.createPage(context.pageOptions); const result = await job.run(page, context.targetDate); - const flatResults = this.transformResults(result, job.jobName); - console.log(`${job.jobName} Job 실행 성공, items: ${flatResults.length}`); + console.log(`${job.jobName} Job 실행 성공`); - return { - processedJobs: [job.jobName], - results: flatResults, - itemCount: flatResults.length, - }; + return result; } catch (error) { console.warn(`Job execution failed: ${job.jobName}`, error); throw new AppError( @@ -81,33 +70,4 @@ export class JobExecutor { return page; } - - /** - * 기존 중첩 구조를 Spring Batch JsonItemReader가 읽을 수 있는 평면 배열로 변환 - * 기존: { '기관명': { 'notice': [...], 'recruit': [...] } } - * 변환: [{ jobName: '기관명', category: 'notice', ...item }, ...] - */ - private transformResults(result: Record, jobName: string): any[] { - const flatResults: any[] = []; - - for (const [institutionName, categories] of Object.entries(result)) { - if (typeof categories === 'object' && categories !== null) { - for (const [category, items] of Object.entries(categories)) { - if (Array.isArray(items)) { - items.forEach((item) => { - flatResults.push({ - jobName, - institutionName, - category, - crawledAt: new Date().toISOString(), - ...item, - }); - }); - } - } - } - } - - return flatResults; - } } diff --git a/src/entity/job/JobRegistry.ts b/src/entity/job/JobRegistry.ts index 64a4720..0fa1ef5 100644 --- a/src/entity/job/JobRegistry.ts +++ b/src/entity/job/JobRegistry.ts @@ -1,6 +1,6 @@ import { Job } from './Job'; -import { 대한의료사회복지사협회 } from './implement/대한의료사회복지사협회'; -import { 한국노인인력개발원 } from './implement/한국노인인력개발원'; +import { 대한의료사회복지사협회 } from './implement/etc/대한의료사회복지사협회'; +import { 한국노인인력개발원 } from './implement/etc/한국노인인력개발원'; import { 경기도사회복지사협회 } from './implement/gyeonggi/경기도사회복지사협회'; import { 오정노인복지기관 } from './implement/gyeonggi/bucheon/부천시노인복지기관포털/오정노인복지관'; import { 원미노인복지관 } from './implement/gyeonggi/bucheon/부천시노인복지기관포털/원미노인복지관'; @@ -16,6 +16,9 @@ import { 인천광역시장애인종합복지관 } from './implement/incheon/인 import { 인천광역시사회복지사협회 } from './implement/incheon/인천광역시사회복지사협회'; import { 미추홀장애인종합복지관 } from './implement/incheon/미추홀장애인종합복지관'; import { 서울시사회복지사협회 } from './implement/seoul/서울시사회복지사협회'; +import { 거모종합사회복지관 } from './implement/gyeonggi/siheung/거모종합사회복지관'; +import { 고강종합사회복지관 } from './implement/gyeonggi/bucheon/고강종합사회복지관'; +import { 고산종합사회복지관 } from './implement/gyeonggi/uijeongbu/고산종합사회복지관'; /** * 모든 크롤링 Job들을 등록하고 관리하는 Registry 클래스 @@ -39,6 +42,9 @@ export class JobRegistry { new 인천광역시사회복지사협회(), new 미추홀장애인종합복지관(), new 서울시사회복지사협회(), + new 거모종합사회복지관(), + new 고강종합사회복지관(), + new 고산종합사회복지관(), ]; static getAllJobs(): Job[] { diff --git "a/src/entity/job/implement/\353\214\200\355\225\234\354\235\230\353\243\214\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" "b/src/entity/job/implement/etc/\353\214\200\355\225\234\354\235\230\353\243\214\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" similarity index 86% rename from "src/entity/job/implement/\353\214\200\355\225\234\354\235\230\353\243\214\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" rename to "src/entity/job/implement/etc/\353\214\200\355\225\234\354\235\230\353\243\214\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" index d067696..ee039a5 100644 --- "a/src/entity/job/implement/\353\214\200\355\225\234\354\235\230\353\243\214\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" +++ "b/src/entity/job/implement/etc/\353\214\200\355\225\234\354\235\230\353\243\214\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" @@ -1,9 +1,9 @@ -import { AbstractJob } from '../AbstractJob'; -import { SimpleTemplateStep } from '../../step/SimpleTemplateStep'; +import { AbstractJob } from '../../AbstractJob'; +import { SimpleTemplateStep } from '../../../step/SimpleTemplateStep'; import { Locator } from 'playwright-core'; -import { Category } from '../../Category'; -import { parseDate } from '../../../utils/DateUtils'; -import { Optimize, Optimizer } from '../../Optimize'; +import { Category } from '../../../Category'; +import { parseKoreaDate } from '../../../../utils/DateUtils'; +import { Optimize, Optimizer } from '../../../Optimize'; export class 대한의료사회복지사협회 extends AbstractJob { constructor() { @@ -31,7 +31,7 @@ class 공지사항 extends SimpleTemplateStep { const id = this.extractIdUsingStringMethods(link); const title = (await a.textContent()).trim(); const dateStr = (await card.locator('.time').textContent()).trim(); - const createdAt = parseDate(dateStr, '.'); + const createdAt = parseKoreaDate(dateStr, '.'); return { id: parseInt(id), @@ -65,7 +65,7 @@ class 채용 extends SimpleTemplateStep { const id = this.extractIdUsingStringMethods(link); const title = (await a.textContent()).trim(); const dateStr = (await card.locator('.time').textContent()).trim(); - const createdAt = parseDate(dateStr, '.'); + const createdAt = parseKoreaDate(dateStr, '.'); return { id: parseInt(id), diff --git "a/src/entity/job/implement/\355\225\234\352\265\255\353\205\270\354\235\270\354\235\270\353\240\245\352\260\234\353\260\234\354\233\220.ts" "b/src/entity/job/implement/etc/\355\225\234\352\265\255\353\205\270\354\235\270\354\235\270\353\240\245\352\260\234\353\260\234\354\233\220.ts" similarity index 81% rename from "src/entity/job/implement/\355\225\234\352\265\255\353\205\270\354\235\270\354\235\270\353\240\245\352\260\234\353\260\234\354\233\220.ts" rename to "src/entity/job/implement/etc/\355\225\234\352\265\255\353\205\270\354\235\270\354\235\270\353\240\245\352\260\234\353\260\234\354\233\220.ts" index c65302e..57a5bd3 100644 --- "a/src/entity/job/implement/\355\225\234\352\265\255\353\205\270\354\235\270\354\235\270\353\240\245\352\260\234\353\260\234\354\233\220.ts" +++ "b/src/entity/job/implement/etc/\355\225\234\352\265\255\353\205\270\354\235\270\354\235\270\353\240\245\352\260\234\353\260\234\354\233\220.ts" @@ -1,9 +1,9 @@ -import { AbstractJob } from '../AbstractJob'; -import { SimpleTemplateStep } from '../../step/SimpleTemplateStep'; -import { Category } from '../../Category'; +import { AbstractJob } from '../../AbstractJob'; +import { SimpleTemplateStep } from '../../../step/SimpleTemplateStep'; +import { Category } from '../../../Category'; import { Locator } from 'playwright-core'; -import { parseDate } from '../../../utils/DateUtils'; -import { Optimize, Optimizer } from '../../Optimize'; +import { parseKoreaDate } from '../../../../utils/DateUtils'; +import { Optimize, Optimizer } from '../../../Optimize'; export class 한국노인인력개발원 extends AbstractJob { constructor() { @@ -28,7 +28,7 @@ class 공지사항 extends SimpleTemplateStep { const title = (await a.textContent()).trim(); const dateStr = (await card.locator('td:nth-child(5)').textContent()).trim(); - const createdAt = parseDate(dateStr, '.'); + const createdAt = parseKoreaDate(dateStr, '.'); return { id: parseInt(id), @@ -51,7 +51,7 @@ class 채용 extends SimpleTemplateStep { const title = (await a.textContent()).trim(); const dateStr = (await card.locator('td:nth-child(5)').textContent()).trim(); - const createdAt = parseDate(dateStr, '.'); + const createdAt = parseKoreaDate(dateStr, '.'); return { id: parseInt(id), diff --git a/src/entity/job/implement/gyeonggi/bucheon/OnlyBucheonDefaultStep.ts b/src/entity/job/implement/gyeonggi/bucheon/OnlyBucheonDefaultStep.ts index a19914c..2517d7c 100644 --- a/src/entity/job/implement/gyeonggi/bucheon/OnlyBucheonDefaultStep.ts +++ b/src/entity/job/implement/gyeonggi/bucheon/OnlyBucheonDefaultStep.ts @@ -1,6 +1,6 @@ import { AbstractStep } from '../../../../step/AbstractStep'; import { Page } from 'playwright-core'; -import { parseDate } from '../../../../../utils/DateUtils'; +import { parseKoreaDate } from '../../../../../utils/DateUtils'; import { Category } from '../../../../Category'; export class OnlyBucheonDefaultStep extends AbstractStep { @@ -38,7 +38,7 @@ export class OnlyBucheonDefaultStep extends AbstractStep { const id = (await card.locator('.cell').first().textContent()).trim(); const title = (await card.locator('.title .tit').textContent()).trim(); - const createdAt: Date = parseDate((await card.locator('.date').textContent()).trim()); + const createdAt: Date = parseKoreaDate((await card.locator('.date').textContent()).trim()); const link = this.parseOnclick(await card.locator('.tit_cont').getAttribute('onclick')); list.push({ diff --git a/src/entity/job/implement/gyeonggi/bucheon/OnlyBucheonImageStep.ts b/src/entity/job/implement/gyeonggi/bucheon/OnlyBucheonImageStep.ts index 362cd2b..cbc3d85 100644 --- a/src/entity/job/implement/gyeonggi/bucheon/OnlyBucheonImageStep.ts +++ b/src/entity/job/implement/gyeonggi/bucheon/OnlyBucheonImageStep.ts @@ -1,7 +1,7 @@ import { AbstractStep } from '../../../../step/AbstractStep'; import { Category } from '../../../../Category'; import { Page } from 'playwright-core'; -import { parseDate, isEqualOrAfterDateOnly } from '../../../../../utils/DateUtils'; +import { parseKoreaDate, isEqualOrAfterDateOnly } from '../../../../../utils/DateUtils'; export class OnlyBucheonImageStep extends AbstractStep { private readonly category: Category; @@ -45,7 +45,7 @@ export class OnlyBucheonImageStep extends AbstractStep { ) .trim() .slice(0, 10); - const createdAt = parseDate(dateStr); + const createdAt = parseKoreaDate(dateStr); if (!isEqualOrAfterDateOnly(syncDate, createdAt)) { break; diff --git "a/src/entity/job/implement/gyeonggi/bucheon/\352\263\240\352\260\225\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" "b/src/entity/job/implement/gyeonggi/bucheon/\352\263\240\352\260\225\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" new file mode 100644 index 0000000..9e1dc46 --- /dev/null +++ "b/src/entity/job/implement/gyeonggi/bucheon/\352\263\240\352\260\225\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" @@ -0,0 +1,48 @@ +import { Locator } from "playwright-core"; +import { Category } from "../../../../Category"; +import { Optimize, Optimizer } from "../../../../Optimize"; +import { MultiCategoryTemplateStep } from "../../../../step/MultiCategoryTemplateStep"; +import { AbstractJob } from "../../../AbstractJob"; +import { parseKoreaDate } from "../../../../../utils/DateUtils"; +import { classifyCategory } from "../../../../../utils/CategoryClassifier"; + +export class 고강종합사회복지관 extends AbstractJob { + constructor() { + super('고강종합사회복지관', 'https://gogangwc.or.kr', [new 알림마당()]) + } + + registerOptimizer(optimizer: Optimizer) { + optimizer.register(Optimize.JS); + } +} + +class 알림마당 extends MultiCategoryTemplateStep { + constructor() { + super( + 'https://gogangwc.tistory.com/category/%EC%95%8C%EB%A6%BC%EB%A7%88%EB%8B%B9', + '.index-item.article-item', + Category.NOTICE + ); + } + + async select(card: Locator, baseUrl: string): Promise { + const a = card.locator('.index-item-link'); + const link = await a.getAttribute('href'); + const title = (await card.locator('h3').textContent()).trim(); + + const dateStr = (await card.locator('.digit').textContent()).trim(); + const createdAt = parseKoreaDate(dateStr, '. '); + + return { + title: title, + createdAt: createdAt, + link: baseUrl + link, + } + } + + categorize(data: object): Category | null { + const { title } = data as { title: string }; + return classifyCategory(title); + } +} + diff --git "a/src/entity/job/implement/gyeonggi/siheung/\352\261\260\353\252\250\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" "b/src/entity/job/implement/gyeonggi/siheung/\352\261\260\353\252\250\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" new file mode 100644 index 0000000..cf67885 --- /dev/null +++ "b/src/entity/job/implement/gyeonggi/siheung/\352\261\260\353\252\250\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" @@ -0,0 +1,97 @@ +import { Locator } from "playwright-core"; +import { Category } from "../../../../Category"; +import { Optimize, Optimizer } from "../../../../Optimize"; +import { MultiCategoryTemplateStep } from "../../../../step/MultiCategoryTemplateStep"; +import { AbstractJob } from "../../../AbstractJob"; +import { parseKoreaDate } from "../../../../../utils/DateUtils"; +import { classifyCategory } from "../../../../../utils/CategoryClassifier"; +import { SimpleTemplateStep } from "../../../../step/SimpleTemplateStep"; + +export class 거모종합사회복지관 extends AbstractJob { + constructor() { + super('거모종합사회복지관', 'http://geomo.or.kr/', [new 공지사항(), new 보도자료(), new 소식홍보지()]) + } + + registerOptimizer(optimizer: Optimizer) { + optimizer.register(Optimize.JS); + } +} + +class 공지사항 extends MultiCategoryTemplateStep { + constructor() { + super( + 'http://geomo.or.kr/bbs/board.php?bo_table=notice', + 'table tbody tr', + Category.NOTICE + ); + } + + async select(card: Locator, baseUrl: string): Promise { + const a = card.locator('.td_subject a'); + const link = await a.getAttribute('href'); + const title = (await a.textContent()).trim(); + + const dateStr = (await card.locator('.td_date').textContent()).trim(); + const createdAt = parseKoreaDate(dateStr, '-'); + return { + title: title, + createdAt: createdAt, + link: link, + } + } + + categorize(data: object): Category | null { + const { title } = data as { title: string }; + return classifyCategory(title); + } +} + +class 보도자료 extends SimpleTemplateStep { + constructor() { + super( + 'http://geomo.or.kr/bbs/board.php?bo_table=news', + 'table tbody tr', + Category.WELFARE + ); + } + + async select(card: Locator, baseUrl: string): Promise { + const a = card.locator('.td_subject a'); + const link = await a.getAttribute('href'); + const title = (await a.textContent()).trim(); + + const dateStr = (await card.locator('.td_date').textContent()).trim(); + const createdAt = parseKoreaDate(dateStr, '-'); + + return { + title: title, + createdAt: createdAt, + link: link, + }; + } +} + +class 소식홍보지 extends SimpleTemplateStep { + constructor() { + super( + 'http://geomo.or.kr/bbs/board.php?bo_table=promotion', + 'table tbody tr', + Category.WELFARE + ); + } + + async select(card: Locator, baseUrl: string): Promise { + const a = card.locator('.td_subject a'); + const link = await a.getAttribute('href'); + const title = (await a.textContent()).trim(); + + const dateStr = (await card.locator('.td_date').textContent()).trim(); + const createdAt = parseKoreaDate(dateStr, '-'); + + return { + title: title, + createdAt: createdAt, + link: link, + }; + } +} \ No newline at end of file diff --git "a/src/entity/job/implement/gyeonggi/uijeongbu/\352\263\240\354\202\260\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" "b/src/entity/job/implement/gyeonggi/uijeongbu/\352\263\240\354\202\260\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" new file mode 100644 index 0000000..1dab7a2 --- /dev/null +++ "b/src/entity/job/implement/gyeonggi/uijeongbu/\352\263\240\354\202\260\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" @@ -0,0 +1,106 @@ +import { Locator } from "playwright-core"; +import { Category } from "../../../../Category"; +import { Optimize, Optimizer } from "../../../../Optimize"; +import { MultiCategoryTemplateStep } from "../../../../step/MultiCategoryTemplateStep"; +import { AbstractJob } from "../../../AbstractJob"; +import { SimpleTemplateStep } from "../../../../step/SimpleTemplateStep"; +import { classifyCategory } from "../../../../../utils/CategoryClassifier"; +import { parseKoreaDate } from "../../../../../utils/DateUtils"; + +export class 고산종합사회복지관 extends AbstractJob { + constructor() { + super('고산종합사회복지관', 'https://ujbgosan.or.kr', [new 공지사항_전체(), new 공지사항_안내(), new 공지사항_공고()]) + } + + registerOptimizer(optimizer: Optimizer) { + optimizer.register(Optimize.JS); + } +} + +class 공지사항_전체 extends MultiCategoryTemplateStep { + + constructor() { + super( + 'https://ujbgosan.or.kr/bbs/board.php?bo_table=notice', + '.list_01 ul li:not(.li_top)', + Category.NOTICE + ); + } + + async select(card: Locator, baseUrl: string): Promise { + const categoryElement = card.locator('.bo_cate_link'); + const categoryText = await categoryElement.textContent(); + + if (categoryText && categoryText.trim() !== '공지') { + return null; + } + + const a = card.locator('.subject a'); + const link = await a.getAttribute('href'); + const title = (await a.textContent()).trim(); + + const dateStr = (await card.locator('.bo_date').textContent()).trim(); + const createdAt = parseKoreaDate(dateStr, '-'); + + return { + title: title, + createdAt: createdAt, + link: link + } + } + + categorize(data: object): Category | null { + const { title } = data as { title: string }; + return classifyCategory(title); + } +} + +class 공지사항_안내 extends SimpleTemplateStep { + constructor() { + super( + 'https://ujbgosan.or.kr/bbs/board.php?bo_table=notice&sca=%EC%95%88%EB%82%B4', + '.list_01 ul li:not(.li_top)', + Category.WELFARE + ); + } + + async select(card: Locator, baseUrl: string): Promise { + const a = card.locator('.subject a'); + const link = await a.getAttribute('href'); + const title = (await a.textContent()).trim(); + + const dateStr = (await card.locator('.bo_date').textContent()).trim(); + const createdAt = parseKoreaDate(dateStr, '-'); + + return { + title: title, + createdAt: createdAt, + link: link + } + } +} + +class 공지사항_공고 extends SimpleTemplateStep { + constructor() { + super( + 'https://ujbgosan.or.kr/bbs/board.php?bo_table=notice&sca=%EA%B3%B5%EA%B3%A0', + '.list_01 ul li:not(.li_top)', + Category.RECRUIT + ); + } + + async select(card: Locator, baseUrl: string): Promise { + const a = card.locator('.subject a'); + const link = await a.getAttribute('href'); + const title = (await a.textContent()).trim(); + + const dateStr = (await card.locator('.bo_date').textContent()).trim(); + const createdAt = parseKoreaDate(dateStr, '-'); + + return { + title: title, + createdAt: createdAt, + link: link + } + } +} diff --git "a/src/entity/job/implement/gyeonggi/\352\262\275\352\270\260\353\217\204\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" "b/src/entity/job/implement/gyeonggi/\352\262\275\352\270\260\353\217\204\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" index e821283..45caa5b 100644 --- "a/src/entity/job/implement/gyeonggi/\352\262\275\352\270\260\353\217\204\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" +++ "b/src/entity/job/implement/gyeonggi/\352\262\275\352\270\260\353\217\204\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" @@ -2,7 +2,7 @@ import { AbstractJob } from '../../AbstractJob'; import { SimpleTemplateStep } from '../../../step/SimpleTemplateStep'; import { Locator } from 'playwright-core'; import { Category } from '../../../Category'; -import { parseDate } from '../../../../utils/DateUtils'; +import { parseKoreaDate } from '../../../../utils/DateUtils'; import { Optimize, Optimizer } from '../../../Optimize'; export class 경기도사회복지사협회 extends AbstractJob { @@ -32,7 +32,7 @@ class 공지사항 extends SimpleTemplateStep { const dateDiv = card.locator('.date_box'); const yearMonth = (await dateDiv.locator('span').textContent()).trim(); const day = (await dateDiv.locator('p').textContent()).trim(); - const createdAt = parseDate(yearMonth + '.' + day, '.'); + const createdAt = parseKoreaDate(yearMonth + '.' + day, '.'); return { id: parseInt(id), @@ -67,7 +67,7 @@ class 채용 extends SimpleTemplateStep { const dateDiv = card.locator('.date_box'); const yearMonth = (await dateDiv.locator('span').textContent()).trim(); const day = (await dateDiv.locator('p').textContent()).trim(); - const createdAt = parseDate(yearMonth + '.' + day, '.'); + const createdAt = parseKoreaDate(yearMonth + '.' + day, '.'); return { id: parseInt(id), diff --git "a/src/entity/job/implement/incheon/\353\257\270\354\266\224\355\231\200\354\236\245\354\225\240\354\235\270\354\242\205\355\225\251\353\263\265\354\247\200\352\264\200.ts" "b/src/entity/job/implement/incheon/\353\257\270\354\266\224\355\231\200\354\236\245\354\225\240\354\235\270\354\242\205\355\225\251\353\263\265\354\247\200\352\264\200.ts" index d3a62b5..3d63306 100644 --- "a/src/entity/job/implement/incheon/\353\257\270\354\266\224\355\231\200\354\236\245\354\225\240\354\235\270\354\242\205\355\225\251\353\263\265\354\247\200\352\264\200.ts" +++ "b/src/entity/job/implement/incheon/\353\257\270\354\266\224\355\231\200\354\236\245\354\225\240\354\235\270\354\242\205\355\225\251\353\263\265\354\247\200\352\264\200.ts" @@ -2,7 +2,7 @@ import { AbstractJob } from '../../AbstractJob'; import { SimpleTemplateStep } from '../../../step/SimpleTemplateStep'; import { Locator, Page } from 'playwright-core'; import { Category } from '../../../Category'; -import { parseDate, isEqualOrAfterDateOnly } from '../../../../utils/DateUtils'; +import { parseKoreaDate, isEqualOrAfterDateOnly } from '../../../../utils/DateUtils'; import { AbstractStep } from '../../../step/AbstractStep'; import { Optimize, Optimizer } from '../../../Optimize'; @@ -59,7 +59,7 @@ class 미추홀소식 extends AbstractStep { ).trim(); // 2025-05-15 dateStr = ('20' + dateStr).slice(0, 10); - const createdAt = parseDate(dateStr, '-'); + const createdAt = parseKoreaDate(dateStr, '-'); if (!isEqualOrAfterDateOnly(syncDate, createdAt)) break; @@ -92,7 +92,7 @@ class 미추홀채용 extends SimpleTemplateStep { const a = card.locator('.td_subject > a'); const link = await a.getAttribute('href'); const title = (await a.textContent()).trim(); - const createdAt = parseDate((await card.locator('.td_date').textContent()).trim(), '-'); + const createdAt = parseKoreaDate((await card.locator('.td_date').textContent()).trim(), '-'); return { id: parseInt(id), @@ -117,7 +117,7 @@ class 미추홀공지사항 extends SimpleTemplateStep { const a = card.locator('.td_subject > a'); const link = await a.getAttribute('href'); const title = (await a.textContent()).trim(); - const createdAt = parseDate((await card.locator('.td_date').textContent()).trim(), '-'); + const createdAt = parseKoreaDate((await card.locator('.td_date').textContent()).trim(), '-'); return { id: parseInt(id), diff --git "a/src/entity/job/implement/incheon/\354\235\270\354\262\234\352\264\221\354\227\255\354\213\234\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" "b/src/entity/job/implement/incheon/\354\235\270\354\262\234\352\264\221\354\227\255\354\213\234\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" index 033be34..ddaf2e2 100644 --- "a/src/entity/job/implement/incheon/\354\235\270\354\262\234\352\264\221\354\227\255\354\213\234\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" +++ "b/src/entity/job/implement/incheon/\354\235\270\354\262\234\352\264\221\354\227\255\354\213\234\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" @@ -2,7 +2,7 @@ import { AbstractJob } from '../../AbstractJob'; import { SimpleTemplateStep } from '../../../step/SimpleTemplateStep'; import { Category } from '../../../Category'; import { Locator } from 'playwright-core'; -import { parseDate } from '../../../../utils/DateUtils'; +import { parseKoreaDate } from '../../../../utils/DateUtils'; import { Optimize, Optimizer } from '../../../Optimize'; export class 인천광역시사회복지사협회 extends AbstractJob { @@ -34,7 +34,7 @@ class 행사및교육 extends SimpleTemplateStep { const id = params.get('idx'); let dateStr = '20' + (await card.locator('td:nth-child(3)').textContent()).trim(); - const createdAt = parseDate(dateStr, '.'); + const createdAt = parseKoreaDate(dateStr, '.'); return { id: parseInt(id), @@ -63,7 +63,7 @@ class 구인구직 extends SimpleTemplateStep { const id = params.get('idx'); let dateStr = '20' + (await card.locator('td:nth-child(4)').textContent()).trim(); - const createdAt = parseDate(dateStr, '.'); + const createdAt = parseKoreaDate(dateStr, '.'); return { id: parseInt(id), @@ -88,7 +88,7 @@ class 소식 extends SimpleTemplateStep { const id = params.get('idx'); let dateStr = '20' + (await card.locator('td:nth-child(3)').textContent()).trim(); - const createdAt = parseDate(dateStr, '.'); + const createdAt = parseKoreaDate(dateStr, '.'); return { id: parseInt(id), @@ -113,7 +113,7 @@ class 공지사항 extends SimpleTemplateStep { const id = params.get('idx'); let dateStr = '20' + (await card.locator('td:nth-child(3)').textContent()).trim(); - const createdAt = parseDate(dateStr, '.'); + const createdAt = parseKoreaDate(dateStr, '.'); return { id: parseInt(id), diff --git "a/src/entity/job/implement/incheon/\354\235\270\354\262\234\352\264\221\354\227\255\354\213\234\354\236\245\354\225\240\354\235\270\354\242\205\355\225\251\353\263\265\354\247\200\352\264\200.ts" "b/src/entity/job/implement/incheon/\354\235\270\354\262\234\352\264\221\354\227\255\354\213\234\354\236\245\354\225\240\354\235\270\354\242\205\355\225\251\353\263\265\354\247\200\352\264\200.ts" index 2feceeb..c48caba 100644 --- "a/src/entity/job/implement/incheon/\354\235\270\354\262\234\352\264\221\354\227\255\354\213\234\354\236\245\354\225\240\354\235\270\354\242\205\355\225\251\353\263\265\354\247\200\352\264\200.ts" +++ "b/src/entity/job/implement/incheon/\354\235\270\354\262\234\352\264\221\354\227\255\354\213\234\354\236\245\354\225\240\354\235\270\354\242\205\355\225\251\353\263\265\354\247\200\352\264\200.ts" @@ -2,7 +2,7 @@ import { AbstractJob } from '../../AbstractJob'; import { SimpleTemplateStep } from '../../../step/SimpleTemplateStep'; import { Locator } from 'playwright-core'; import { Category } from '../../../Category'; -import { parseDate } from '../../../../utils/DateUtils'; +import { parseKoreaDate } from '../../../../utils/DateUtils'; import { Optimize, Optimizer } from '../../../Optimize'; export class 인천광역시장애인종합복지관 extends AbstractJob { @@ -30,7 +30,7 @@ class 인천채용 extends SimpleTemplateStep { let dateStr = (await card.locator('.wr-date').textContent()).trim(); - const createdAt = parseDate(this.getStartDate(dateStr)); + const createdAt = parseKoreaDate(this.getStartDate(dateStr)); return { id: parseInt(id), @@ -83,7 +83,7 @@ class 인천이벤트 extends SimpleTemplateStep { convertToDateFormat(timeString: string): Date { // 이미 yyyy.MM.dd 형식이면 그대로 반환 if (/^\d{4}\.\d{2}\.\d{2}$/.test(timeString)) { - return parseDate(timeString, '.'); + return parseKoreaDate(timeString, '.'); } const now = new Date(); @@ -103,7 +103,7 @@ class 인천이벤트 extends SimpleTemplateStep { } } - return parseDate(timeString, '.'); + return parseKoreaDate(timeString, '.'); } } class 인천공지사항 extends SimpleTemplateStep { @@ -128,7 +128,7 @@ class 인천공지사항 extends SimpleTemplateStep { } else { let createdAtStr = (await card.locator('.wr-date').textContent()).trim(); // 05.01 createdAtStr = this.inferYearFromDate(createdAtStr); // 2025.05.01 - createdAt = parseDate(createdAtStr, '.'); + createdAt = parseKoreaDate(createdAtStr, '.'); } return { diff --git "a/src/entity/job/implement/incheon/\354\235\270\354\262\234\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" "b/src/entity/job/implement/incheon/\354\235\270\354\262\234\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" index e7a71de..8489fb6 100644 --- "a/src/entity/job/implement/incheon/\354\235\270\354\262\234\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" +++ "b/src/entity/job/implement/incheon/\354\235\270\354\262\234\354\242\205\355\225\251\354\202\254\355\232\214\353\263\265\354\247\200\352\264\200.ts" @@ -1,7 +1,7 @@ import { AbstractJob } from '../../AbstractJob'; import { AbstractStep } from '../../../step/AbstractStep'; import { Locator, Page } from 'playwright-core'; -import { parseDate } from '../../../../utils/DateUtils'; +import { parseKoreaDate } from '../../../../utils/DateUtils'; import { Category } from '../../../Category'; import { Optimizer } from '../../../Optimize'; @@ -40,7 +40,7 @@ class IncheonWelfare extends AbstractStep { ).trim(); const dateMatch = pText.match(/\d{4}\.\d{2}\.\d{2}/); const date = dateMatch ? dateMatch[0] : null; - const createdAt = parseDate(date, '.'); + const createdAt = parseKoreaDate(date, '.'); list.push({ id: Number(id), @@ -84,7 +84,7 @@ class IncheonEvent extends AbstractStep { ).trim(); const dateMatch = pText.match(/\d{4}\.\d{2}\.\d{2}/); const date = dateMatch ? dateMatch[0] : null; - const createdAt = parseDate(date, '.'); + const createdAt = parseKoreaDate(date, '.'); list.push({ id: Number(id), @@ -124,7 +124,7 @@ class IncheonNotice extends AbstractStep { const title = await titleBox.textContent(); const link = baseUrl + (await titleBox.getAttribute('href')); - const createdAt: Date = parseDate( + const createdAt: Date = parseKoreaDate( (await card.locator('.jDate').textContent()).trim(), '.' ); diff --git "a/src/entity/job/implement/seoul/\354\204\234\354\232\270\354\213\234\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" "b/src/entity/job/implement/seoul/\354\204\234\354\232\270\354\213\234\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" index 98744a9..2b2c184 100644 --- "a/src/entity/job/implement/seoul/\354\204\234\354\232\270\354\213\234\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" +++ "b/src/entity/job/implement/seoul/\354\204\234\354\232\270\354\213\234\354\202\254\355\232\214\353\263\265\354\247\200\354\202\254\355\230\221\355\232\214.ts" @@ -1,6 +1,6 @@ import { AbstractJob } from '../../AbstractJob'; import { Locator } from 'playwright-core'; -import { parseDate } from '../../../../utils/DateUtils'; +import { parseKoreaDate } from '../../../../utils/DateUtils'; import { SimpleTemplateStep } from '../../../step/SimpleTemplateStep'; import { Category } from '../../../Category'; import { type } from 'node:os'; @@ -37,7 +37,7 @@ class SeoulEventStep extends SimpleTemplateStep { const link = baseUrl + (await a.getAttribute('href')); const title = (await a.textContent()).trim(); - const createdAt = parseDate((await card.locator('.time').textContent()).trim(), '.'); + const createdAt = parseKoreaDate((await card.locator('.time').textContent()).trim(), '.'); return { id: parseInt(id), @@ -62,7 +62,7 @@ class SeoulRecruitStep extends SimpleTemplateStep { const link = baseUrl + (await a.getAttribute('href')); const title = (await a.textContent()).trim(); - const createdAt = parseDate((await card.locator('.time').textContent()).trim(), '.'); + const createdAt = parseKoreaDate((await card.locator('.time').textContent()).trim(), '.'); return { id: parseInt(id), @@ -91,7 +91,7 @@ class SeoulNoticeStep extends SimpleTemplateStep { const link = baseUrl + (await a.getAttribute('href')); const title = (await a.textContent()).trim(); - const createdAt = parseDate((await card.locator('.time').textContent()).trim(), '.'); + const createdAt = parseKoreaDate((await card.locator('.time').textContent()).trim(), '.'); return { id: parseInt(id), diff --git a/src/entity/step/MultiCategoryTemplateStep.ts b/src/entity/step/MultiCategoryTemplateStep.ts new file mode 100644 index 0000000..9bb0eee --- /dev/null +++ b/src/entity/step/MultiCategoryTemplateStep.ts @@ -0,0 +1,56 @@ +import { AbstractStep } from './AbstractStep'; +import { Locator, Page } from 'playwright-core'; +import { Category } from '../Category'; + +export abstract class MultiCategoryTemplateStep extends AbstractStep { + private readonly url: string; + private readonly selectorAll: string; + private readonly defaultCategory: Category; + + constructor(url: string, selectorAll: string, defaultCategory: Category) { + super(); + this.url = url; + this.selectorAll = selectorAll; + this.defaultCategory = defaultCategory; + } + + async execute(page: Page, baseUrl: string, syncDate: Date): Promise> { + await page.goto(this.url); + + await page.waitForSelector(this.selectorAll, { + state: 'attached', + }); + + const cards = await page.locator(this.selectorAll).all(); + const result: Record = {}; + + for (const card of cards) { + const data = await this.select(card, baseUrl); + if (data == null) continue; + + const category = this.categorize(data) ?? this.defaultCategory; + + if (!result[category]) { + result[category] = []; + } + result[category].push(data); + } + + return result; + } + + /** + * 각 card에서 데이터를 추출하는 메서드 + * @param card 크롤링할 카드 요소 + * @param baseUrl 기본 URL + * @returns 추출된 데이터 객체 또는 null (스킵할 경우) + */ + abstract select(card: Locator, baseUrl: string): Promise; + + /** + * 추출된 데이터에서 카테고리를 분류하는 메서드 + * @param data select 메서드에서 반환된 데이터 + * @returns 분류된 카테고리 또는 null + */ + abstract categorize(data: object): Category | null; +} \ No newline at end of file diff --git a/src/entity/step/SimpleTemplateStep.ts b/src/entity/step/SimpleTemplateStep.ts index fcbc5bf..c618e6c 100644 --- a/src/entity/step/SimpleTemplateStep.ts +++ b/src/entity/step/SimpleTemplateStep.ts @@ -36,5 +36,11 @@ export abstract class SimpleTemplateStep extends AbstractStep { }; } + /** + * 각 card에서 데이터를 추출하는 메서드 + * @param card 크롤링할 카드 요소 + * @param baseUrl 기본 URL + * @returns 추출된 데이터 객체 + */ abstract select(card: Locator, baseUrl: string): Promise; } diff --git a/src/utils/CategoryClassifier.ts b/src/utils/CategoryClassifier.ts new file mode 100644 index 0000000..cae1150 --- /dev/null +++ b/src/utils/CategoryClassifier.ts @@ -0,0 +1,15 @@ +import { Category } from '../entity/Category'; + +export function classifyCategory(text: string): Category | null { + if (!text) return null; + + const categoryMap = [ + { keywords: ['채용'], category: Category.RECRUIT } + ]; + + const found = categoryMap.find(item => + item.keywords.some(keyword => text.includes(keyword)) + ); + + return found ? found.category : null; +} \ No newline at end of file diff --git a/src/utils/DateUtils.ts b/src/utils/DateUtils.ts index 153a36d..c8e212b 100644 --- a/src/utils/DateUtils.ts +++ b/src/utils/DateUtils.ts @@ -1,5 +1,6 @@ import path from 'node:path'; import fs from 'node:fs'; +import { AppError } from '../errors/AppError'; /** * 한국 시간 ISO 형식 반환 @@ -16,10 +17,37 @@ export function getKoreaTimeISO(): string { ); } -export function parseDate(dateString: string, split: string = '-'): Date { - const [year, month, day] = dateString.split(split).map((num) => parseInt(num, 10)); - // JavaScript Date는 월이 0-11이므로 month - 1 - return new Date(year, month - 1, day); +export function parseKoreaDate(dateString: string, split: string = '-'): Date { + const parts = dateString.split(split); + + // 입력 검증 + if (parts.length !== 3) { + throw new AppError( + `잚못된 날짜 형식`, + 'parseKoreaDate', + undefined, + { dateString, split, partsCount: parts.length } + ); + } + + const [yearStr, monthStr, dayStr] = parts; + const year = parseInt(yearStr, 10); + const month = parseInt(monthStr, 10); + const day = parseInt(dayStr, 10); + + // NaN 체크 + if (isNaN(year) || isNaN(month) || isNaN(day)) { + throw new AppError( + `잘못된 날짜 형식`, + 'parseKoreaDate', + undefined, + { dateString, split, year: yearStr, month: monthStr, day: dayStr } + ); + } + + // 한국 시간대(UTC+9)에서 해당 날짜의 00:00:00을 나타내는 Date 객체 생성 + // 한국 시간 2024-06-15 00:00:00 = UTC 2024-06-14 15:00:00 + return new Date(`${year}-${month.toString().padStart(2, '0')}-${day.toString().padStart(2, '0')}T00:00:00+09:00`); } export const isEqualOrAfterDateOnly = (baseDate: Date, compareDate: Date | null): boolean => { @@ -32,3 +60,17 @@ export const isEqualOrAfterDateOnly = (baseDate: Date, compareDate: Date | null) return base >= compare; }; + +/** + * Date 객체를 한국시간 ISO 형식 문자열로 변환 + * @param date Date 객체 + * @returns YYYY-MM-DDTHH:mm:ss+09:00 형식의 한국 시간 문자열 + * @example formatKoreaDateISO(new Date()) => "2025-06-10T22:12:35+09:00" + */ +export function formatKoreaDateISO(date: Date): string { + return date + .toLocaleString('sv-SE', { + timeZone: 'Asia/Seoul', + }) + .replace(' ', 'T') + '+09:00'; +} diff --git a/test-lambda.sh b/test-lambda.sh index 3ce77b6..55d4f87 100755 --- a/test-lambda.sh +++ b/test-lambda.sh @@ -1,77 +1,160 @@ #!/bin/bash # =================================== -# AWS Lambda 로컬 테스트 스크립트 +# AWS Lambda 로컬 테스트 스크립트 (실시간 로그 포함) # =================================== # # 사용법: -# 1. docker-compose -f docker-compose.local.yml up --build -# 2. ./test-lambda.sh +# 1. ./test-lambda.sh # # 전제조건: # - Docker가 실행 중이어야 함 # - 포트 9000이 사용 가능해야 함 # =================================== -echo "🧪 Testing Lambda function with MinIO..." +echo "🧪 Testing Lambda function with MinIO (with real-time logs)..." echo "📍 Lambda URL: http://localhost:9000" echo "📍 MinIO Console: http://localhost:9002 (minioadmin/minioadmin)" echo "" +# Docker Compose 파일 경로 +COMPOSE_FILE="docker-compose.local.yml" + +# 코드 변경 감지 함수 +check_code_changes() { + local image_name="data-grab-lambda" + + # 소스 코드에서 가장 최근 수정된 파일의 시간 찾기 + local latest_source=$(find src/ -type f \( -name "*.ts" -o -name "*.js" -o -name "*.json" \) -exec stat -f "%m" {} \; 2>/dev/null | sort -nr | head -n1) + + # macOS에서 stat 명령어가 다를 수 있으므로 대안 사용 + if [ -z "$latest_source" ]; then + latest_source=$(find src/ -type f \( -name "*.ts" -o -name "*.js" -o -name "*.json" \) -exec stat -c "%Y" {} \; 2>/dev/null | sort -nr | head -n1) + fi + + # 여전히 값이 없다면 현재 시간 사용 (안전장치) + if [ -z "$latest_source" ]; then + echo "⚠️ Could not detect source file modification times, forcing rebuild" + return 0 + fi + + # Docker 이미지 존재 여부 및 생성 시간 확인 + local image_created=$(docker images --format "table {{.Repository}}:{{.Tag}}\t{{.CreatedAt}}" | grep "$image_name" | head -n1 | awk '{print $3" "$4" "$5}') + + if [ -z "$image_created" ]; then + echo "🔍 Docker image not found, need to build" + return 0 + fi + + # 이미지 생성 시간을 timestamp로 변환 + local image_timestamp=$(date -d "$image_created" +%s 2>/dev/null) + + # macOS의 경우 date 명령어가 다름 + if [ -z "$image_timestamp" ]; then + image_timestamp=$(date -j -f "%Y-%m-%d %H:%M:%S" "$image_created" +%s 2>/dev/null) + fi + + if [ -z "$image_timestamp" ]; then + echo "⚠️ Could not parse image creation time, forcing rebuild" + return 0 + fi + + # 소스 코드가 이미지보다 최신인지 확인 + if [ "$latest_source" -gt "$image_timestamp" ]; then + echo "🔄 Source code is newer than Docker image, rebuilding..." + return 0 + else + echo "✅ Docker image is up to date with source code" + return 1 + fi +} + +# 컨테이너가 이미 실행 중인지 확인 +echo "🔍 Checking if containers are already running..." +containers_running=$(docker-compose -f $COMPOSE_FILE ps -q | wc -l | tr -d ' ') + +if [ "$containers_running" -gt 0 ]; then + echo "📦 Containers are already running" + + # 코드 변경 확인 + if check_code_changes; then + echo "🔄 Rebuilding containers with updated code..." + docker-compose -f $COMPOSE_FILE up --build -d + else + echo "✅ Using existing containers (no code changes detected)" + fi +else + echo "🚀 Starting containers..." + docker-compose -f $COMPOSE_FILE up --build -d +fi + +echo "⏳ Waiting for containers to be ready..." +sleep 10 + +# 실시간 로그 출력을 백그라운드에서 시작 +echo "📊 Starting real-time log monitoring..." +docker-compose -f $COMPOSE_FILE logs -f & +LOG_PID=$! + +# 함수 정의: 스크립트 종료 시 로그 프로세스도 종료 +cleanup() { + echo "" + echo "🛑 Stopping log monitoring..." + kill $LOG_PID 2>/dev/null + echo "🏁 Test completed!" + echo "" + echo "📁 Check results:" + echo " - MinIO Console: http://localhost:9002 (minioadmin/minioadmin)" + echo " - Local folder: ./output/" + echo "" + echo "🛑 To stop containers: docker-compose -f $COMPOSE_FILE down" +} + +# 스크립트 종료 시 cleanup 함수 실행 +trap cleanup EXIT + # Lambda 함수 테스트 URL LAMBDA_URL="http://localhost:9000/2015-03-31/functions/function/invocations" # 연결 테스트 +echo "" echo "🔍 Checking Lambda container status..." +for i in {1..30}; do + if curl -s "http://localhost:9000" > /dev/null 2>&1; then + echo "✅ Lambda container is ready!" + break + fi + echo "⏳ Waiting for Lambda container... ($i/30)" + sleep 2 +done + if ! curl -s "http://localhost:9000" > /dev/null 2>&1; then - echo "❌ Lambda container is not running!" - echo "💡 Run: docker-compose -f docker-compose.local.yml up --build" + echo "❌ Lambda container failed to start!" + echo "💡 Check logs above for errors" exit 1 fi -echo "✅ Lambda container is running" -echo "" - -echo "📋 Test 1: Health Check (Empty Event)" -curl -XPOST "${LAMBDA_URL}" \ - -H "Content-Type: application/json" \ - -d '{}' | jq '.' 2>/dev/null || echo "" - -echo -e "\n============================================================\n" -echo "📋 Test 2: Single Job Crawling" -curl -XPOST "${LAMBDA_URL}" \ - -H "Content-Type: application/json" \ - -d '{ - "targetDate": "2025-06-13", - "jobName": "오정노인복지관" - }' | jq '.' 2>/dev/null || echo "" - -echo -e "\n============================================================\n" - -echo "📋 Test 3: All Jobs Crawling (May take longer)" -curl -XPOST "${LAMBDA_URL}" \ - -H "Content-Type: application/json" \ - -d '{ - "targetDate": "2024-01-15" - }' | jq '.' 2>/dev/null || echo "" +echo "" +echo "🔥 Starting tests..." +echo "============================================================" +echo "" -echo -e "\n============================================================\n" +echo "📋 Test: Single Job Crawling - 거모종합사회복지관" +echo "📤 Sending request..." +echo "" -echo "📋 Test 4: Invalid Date Format (Should Fail)" curl -XPOST "${LAMBDA_URL}" \ -H "Content-Type: application/json" \ -d '{ - "targetDate": "2024/01/15", - "jobName": "오정노인복지관" + "targetDate": "2025-07-18", + "jobName": "거모종합사회복지관" }' | jq '.' 2>/dev/null || echo "" -echo -e "\n============================================================\n" - -echo "🎉 All tests completed!" echo "" -echo "📁 Check results:" -echo " - MinIO Console: http://localhost:9002 (minioadmin/minioadmin)" -echo " - Local folder: ./output/" -echo " - Container logs: docker-compose -f docker-compose.local.yml logs lambda-crawler" +echo "============================================================" echo "" -echo "🛑 To stop containers: docker-compose -f docker-compose.local.yml down" \ No newline at end of file +echo "⏳ Test completed! Check the logs above for detailed execution info." +echo "🔍 Logs will continue to show for 10 more seconds..." +sleep 10 + +# cleanup 함수가 trap에 의해 자동으로 호출됩니다 \ No newline at end of file diff --git a/tests/utils/CategoryClassifier.test.ts b/tests/utils/CategoryClassifier.test.ts new file mode 100644 index 0000000..4997a0b --- /dev/null +++ b/tests/utils/CategoryClassifier.test.ts @@ -0,0 +1,25 @@ +import { classifyCategory } from '../../src/utils/CategoryClassifier'; +import { Category } from '../../src/entity/Category'; + +describe('CategoryClassifier', () => { + describe('classifyCategory', () => { + it('부분 문자열로 매칭된다', () => { + expect(classifyCategory('신입채용공고')).toBe(Category.RECRUIT); + expect(classifyCategory('경력채용')).toBe(Category.RECRUIT); + expect(classifyCategory('채용면접')).toBe(Category.RECRUIT); + }); + + it('null과 빈 문자열은 null을 반환한다', () => { + expect(classifyCategory('')).toBeNull(); + expect(classifyCategory(null as any)).toBeNull(); + expect(classifyCategory(undefined as any)).toBeNull(); + expect(classifyCategory(' ')).toBeNull(); // 공백만 있는 경우 + }); + + it('키워드가 없으면 null을 반환한다', () => { + expect(classifyCategory('일반 공지사항입니다')).toBeNull(); + expect(classifyCategory('시설 이용 안내')).toBeNull(); + expect(classifyCategory('연락처 변경 알림')).toBeNull(); + }); + }); +}); \ No newline at end of file diff --git a/tests/utils/DateUtils.test.ts b/tests/utils/DateUtils.test.ts index a711cc3..01c39af 100644 --- a/tests/utils/DateUtils.test.ts +++ b/tests/utils/DateUtils.test.ts @@ -1,4 +1,4 @@ -import { getKoreaTimeISO, parseDate, isEqualOrAfterDateOnly } from '../../src/utils/DateUtils'; +import { getKoreaTimeISO, parseKoreaDate, isEqualOrAfterDateOnly } from '../../src/utils/DateUtils'; describe('DateUtils', () => { describe('getKoreaTimeISO', () => { @@ -12,33 +12,92 @@ describe('DateUtils', () => { }); }); - describe('parseDate', () => { - it.each([ - { - name: '기본 구분자(-) 사용', - dateString: '2024-01-01', - separator: undefined, - expected: { - year: 2024, - month: 0, // 0-based month - date: 1 - } - }, - { - name: '커스텀 구분자(/) 사용', - dateString: '2024/01/01', - separator: '/', - expected: { - year: 2024, - month: 0, - date: 1 + describe('parseKoreaDate', () => { + describe('기본 파싱 동작', () => { + it.each([ + { + name: '기본 구분자(-) 사용', + dateString: '2024-01-15', + separator: undefined, + expected: { + year: 2024, + month: 0, // 0-based month + date: 15 + } + }, + { + name: '커스텀 구분자(/) 사용', + dateString: '2024/03/20', + separator: '/', + expected: { + year: 2024, + month: 2, + date: 20 + } } - } - ])('구분자를 사용해서 문자열을 파싱한다', ({ dateString, separator, expected }) => { - const result = parseDate(dateString, separator); - expect(result.getFullYear()).toBe(expected.year); - expect(result.getMonth()).toBe(expected.month); - expect(result.getDate()).toBe(expected.date); + ])('$name - 날짜를 올바르게 파싱한다', ({ dateString, separator, expected }) => { + const result = parseKoreaDate(dateString, separator); + + // 한국 시간대로 날짜 컴포넌트 확인 + const koreanDate = new Date(result.toLocaleString('en-US', { timeZone: 'Asia/Seoul' })); + + expect(koreanDate.getFullYear()).toBe(expected.year); + expect(koreanDate.getMonth()).toBe(expected.month); + expect(koreanDate.getDate()).toBe(expected.date); + }); + }); + + describe('한국 시간대 적용 검증', () => { + it('한국 시간대(+09:00)가 적용된 Date 객체를 반환한다', () => { + // given when + const result = parseKoreaDate('2024-06-15'); + //then + const isoString = result.toISOString(); + + expect(result).toBeInstanceOf(Date); + + // 한국 시간대로 날짜 컴포넌트 확인 + const koreanDate = new Date(result.toLocaleString('en-US', { timeZone: 'Asia/Seoul' })); + expect(koreanDate.getFullYear()).toBe(2024); + expect(koreanDate.getMonth()).toBe(5); // June (0-based) + expect(koreanDate.getDate()).toBe(15); + }); + + it('한국 시간대로 포맷된 날짜 문자열을 확인한다', () => { + const result = parseKoreaDate('2024-06-15'); + + // 한국 로케일로 포맷했을 때의 결과 확인 + const koreaDateString = result.toLocaleString('ko-KR', { + timeZone: 'Asia/Seoul', + year: 'numeric', + month: '2-digit', + day: '2-digit', + hour: '2-digit', + minute: '2-digit', + second: '2-digit' + }); + + // 최소한 올바른 날짜가 포함되어 있는지 확인 + expect(koreaDateString).toContain('2024'); + expect(koreaDateString).toContain('06'); + expect(koreaDateString).toContain('15'); + }); + }); + + describe('실제 시간대 동작 검증', () => { + it('UTC 시간과의 차이를 확인한다', () => { + // given when + const result = parseKoreaDate('2024-06-15'); + // then + // 같은 날짜의 UTC Date와 비교 + const utcDate = new Date(Date.UTC(2024, 5, 15)); // June 15, 2024 UTC + + // 시간 차이 확인 + const timeDiff = Math.abs(result.getTime() - utcDate.getTime()); + + const expectedDiff = 9 * 60 * 60 * 1000; // 9시간을 밀리초로 변환 + expect(timeDiff).toBe(expectedDiff); + }); }); });