Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/PRODUCT_READINESS.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,17 @@
+ `_validate_upload` 單元 + NFC)。全套 2410 passed。
- 註:`localization.py` 的 dub 上傳走 `tempfile`(OS 管理路徑,audit 評為低風險),本輪未動;
若要一併套白名單可開後續小 PR。
- ✅ 2026-06-15 **後續小 PR 完成(localization 檔案端點上傳硬化)**。`localization.py` 的 5 個
multipart 端點(`translate/image`、`translate/pdf`、`meeting/summarize`、`song/transcribe`、
`dub`)原先只把上傳寫進 `tempfile`(無 path-traversal 風險,故 S-3 評低風險),但缺 `/upload`
已有的「副檔名 + MIME 白名單」——任何人都能往這些端點塞非預期檔案。新增共用
`_validate_media_upload()`:**副檔名為強 gate**(per 端點媒體類別:image=圖片副檔名、pdf=`.pdf`、
meeting/song/dub=影音副檔名),**MIME 寬鬆輔助**(octet-stream/空字串放行=瀏覽器常見,只擋「有給
且明顯非該類」的大類,比照 S-4)。`dub` 走 url 來源不受影響(沒檔不驗)。檔案進 `mkstemp` 不用原
檔名,故只驗媒體類別、不另做檔名 sanitize(path 安全由 tempfile 保證)。補
`tests/test_localization_upload.py` 16 測(強 gate 擋 .exe/文件塞影音端點/無副檔名 + MIME 寬鬆
octet-stream/空放行·矛盾 MIME 擋下 + dub url 不驗·上傳驗 + 純函式單元,**全 mock 不打 API/不跑
ffmpeg**)。本機全套 2691 passed(剩 1 QR 像素為容器缺 Noto CJK 字型假象,CI 權威)。
- [x] 🟡 **S-5 secret 落地強化**(GATE→已拍板)— ✅ 2026-06-07 完成。**劉老師拍板:不加密**
(明文 + gitignore,自架單機可接受,Fernet 靜態加密過度設計、徒增金鑰管理負擔)。改為在
`SECURITY.md` 把機密檔處置講清楚:明文存放、**別放共享磁碟/雲端同步、別進未加密備份**(要備份
Expand Down
50 changes: 49 additions & 1 deletion server/routes/localization.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import os
import tempfile

from fastapi import APIRouter, Depends, File, Form, UploadFile
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
from pydantic import BaseModel, Field

from core.glossary import to_translation_rules
Expand Down Expand Up @@ -43,6 +43,49 @@ def _save_upload(upload: UploadFile, suffix: str = "") -> str:
return path


# ---------- S-4(後續): 檔案端點上傳硬化 ----------
# localization 的 multipart 端點原先只把上傳寫進 tempfile(OS 管理路徑、無 path-traversal
# 風險,S-3 評為低風險),但缺 uploads.py `/upload` 已有的「副檔名 + MIME 白名單」——任何人
# 都能往 image/pdf/meeting/song/dub 端點塞非預期檔案。比照 S-4:副檔名為**強 gate**(per
# 端點媒體類別),MIME **寬鬆輔助**(瀏覽器常回 octet-stream / 空字串,不能硬擋,只擋「有給
# 且明顯非該類」的大類)。檔案進 mkstemp 不用原檔名,故只驗媒體類別、不另做檔名 sanitize
# (path 安全由 tempfile 保證)。
_IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif", ".tif", ".tiff"}
_PDF_EXTS = {".pdf"}
# 影片 / 音訊(meeting / song / dub 共用)
_AV_EXTS = {
".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v",
".mp3", ".wav", ".m4a", ".aac", ".flac", ".ogg", ".opus",
}
_PDF_MIME = frozenset({"application/pdf", "application/x-pdf", "application/acrobat"})


def _validate_media_upload(
upload: UploadFile,
allowed_exts: set[str],
label: str,
*,
mime_prefixes: tuple[str, ...] = (),
mime_exact: frozenset[str] = frozenset(),
) -> None:
"""S-4: 副檔名(強 gate)+ MIME(寬鬆輔助)白名單,擋掉非該類別的上傳檔。"""
ext = os.path.splitext(upload.filename or "")[1].lower()
if ext not in allowed_exts:
raise HTTPException(
status.HTTP_400_BAD_REQUEST,
f"不接受的副檔名 {ext or '(無)'}; {label} 只收 {sorted(allowed_exts)}",
)
ct = (upload.content_type or "").split(";")[0].strip().lower()
if not ct or ct == "application/octet-stream":
return # 瀏覽器常見、放行
if ct in mime_exact or any(ct.startswith(p) for p in mime_prefixes):
return
raise HTTPException(
status.HTTP_400_BAD_REQUEST,
f"不接受的 MIME 類型: {ct}({label})",
)


# ---------- 請求模型 ----------
class TranslateRequest(BaseModel):
text: str
Expand Down Expand Up @@ -234,6 +277,7 @@ async def translate_image(
source_lang: str = Form("auto"),
) -> dict:
"""圖片 OCR + 翻譯(pytesseract,lazy)。回最終翻譯文字。"""
_validate_media_upload(file, _IMAGE_EXTS, "圖片", mime_prefixes=("image/",))
path = _save_upload(file)
try:
result = await asyncio.to_thread(
Expand All @@ -253,6 +297,7 @@ async def translate_pdf(
source_lang: str = Form("en-US"),
) -> dict:
"""PDF 逐頁翻譯(PyMuPDF,lazy)。回最終彙整文字。"""
_validate_media_upload(file, _PDF_EXTS, "PDF", mime_exact=_PDF_MIME)
path = _save_upload(file, suffix=".pdf")
try:
result = await asyncio.to_thread(
Expand All @@ -275,6 +320,7 @@ async def meeting_summarize(

summary_types 以逗號分隔(如 'key_points,decisions')。
"""
_validate_media_upload(file, _AV_EXTS, "會議影音", mime_prefixes=("video/", "audio/"))
path = _save_upload(file)
types = [t.strip() for t in summary_types.split(",") if t.strip()]
try:
Expand Down Expand Up @@ -307,6 +353,7 @@ async def song_transcribe(
"""
from core.song_build import build_song_json_from_media

_validate_media_upload(file, _AV_EXTS, "歌曲影音", mime_prefixes=("video/", "audio/"))
suffix = os.path.splitext(file.filename or "")[1] or ".mp3"
path = _save_upload(file, suffix=suffix)
try:
Expand Down Expand Up @@ -338,6 +385,7 @@ async def dub_video(
if not url:
if file is None:
return {"error": "需提供 url 或上傳 file"}
_validate_media_upload(file, _AV_EXTS, "配音影片", mime_prefixes=("video/", "audio/"))
path = _save_upload(file, suffix=".mp4")
source = url or path
try:
Expand Down
203 changes: 203 additions & 0 deletions tests/test_localization_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
"""server.routes.localization 上傳硬化測試(S-4 後續:localization 檔案端點)。

驗收(比照 test_upload.py S-4 段):
- 5 個 multipart 端點(image/pdf/meeting/song/dub)都套**副檔名白名單**(強 gate)+
**MIME 寬鬆白名單**(輔助)。
- 合法媒體照常通過;明顯非該類別的檔案(如 .exe / 文件塞到影音端點)被擋 400。
- MIME 寬鬆:octet-stream / 空字串放行;明顯不符的 MIME(image 配 av 端點)擋下。
- dub 走 url 來源不受上傳硬化影響(沒有檔案就不驗)。
全程 monkeypatch 媒體/翻譯模組,不打真 API、不跑 ffmpeg/whisper。
"""
from __future__ import annotations

import pytest

pytest.importorskip("fastapi.testclient", reason="需要 fastapi 安裝")
pytest.importorskip("multipart", reason="server.main 內 upload route 需要")

from fastapi.testclient import TestClient

import core.translation.service as svc
import server.routes.localization as loc
from server.main import create_app


@pytest.fixture
def client(monkeypatch):
# 攔住所有會打真 media/Gemini 的點:合法上傳走到這些 fake 才不會踩外部依賴。
monkeypatch.setattr(
svc.translator, "translate_image",
lambda path, t, s: iter(["IMG_OK"]))
monkeypatch.setattr(
svc.translator, "translate_pdf",
lambda path, t, s: iter(["PDF_OK"]))
app = create_app()
with TestClient(app) as c:
yield c


# ---------- 副檔名強 gate(擋非該類別檔案)----------
class TestExtensionGate:
def test_image_rejects_non_image_ext(self, client):
r = client.post(
"/localization/translate/image",
files={"file": ("evil.exe", b"MZ", "application/octet-stream")},
data={"target_lang": "zh-TW"},
)
assert r.status_code == 400
assert "副檔名" in r.json()["detail"]

def test_image_accepts_png(self, client):
r = client.post(
"/localization/translate/image",
files={"file": ("photo.png", b"\x89PNG", "image/png")},
data={"target_lang": "zh-TW"},
)
assert r.status_code == 200 and r.json()["result"] == "IMG_OK"

def test_pdf_rejects_non_pdf_ext(self, client):
r = client.post(
"/localization/translate/pdf",
files={"file": ("notes.txt", b"hi", "text/plain")},
data={"target_lang": "zh-TW"},
)
assert r.status_code == 400 and "副檔名" in r.json()["detail"]

def test_pdf_accepts_pdf(self, client):
r = client.post(
"/localization/translate/pdf",
files={"file": ("doc.pdf", b"%PDF-1.4", "application/pdf")},
data={"target_lang": "zh-TW"},
)
assert r.status_code == 200 and r.json()["result"] == "PDF_OK"

def test_meeting_rejects_document(self, client):
# 文件塞到影音端點 → 擋
r = client.post(
"/localization/meeting/summarize",
files={"file": ("m.pdf", b"%PDF", "application/pdf")},
data={"language": "zh-TW"},
)
assert r.status_code == 400 and "副檔名" in r.json()["detail"]

def test_song_rejects_image(self, client):
r = client.post(
"/localization/song/transcribe",
files={"file": ("cover.png", b"\x89PNG", "image/png")},
data={"song_title": "x"},
)
assert r.status_code == 400 and "副檔名" in r.json()["detail"]

def test_missing_filename_rejected(self, client):
# 無副檔名(空 / 無 ext)一律不過強 gate
r = client.post(
"/localization/translate/image",
files={"file": ("noext", b"data", "image/png")},
data={"target_lang": "zh-TW"},
)
assert r.status_code == 400 and "(無)" in r.json()["detail"]


# ---------- MIME 寬鬆輔助 ----------
class TestMimeLenient:
def test_octet_stream_passes(self, client):
# 副檔名合法 + MIME 為瀏覽器常見 octet-stream → 放行
r = client.post(
"/localization/translate/image",
files={"file": ("photo.jpg", b"\xff\xd8", "application/octet-stream")},
data={"target_lang": "zh-TW"},
)
assert r.status_code == 200

def test_empty_mime_passes(self, client):
r = client.post(
"/localization/translate/image",
files={"file": ("photo.jpg", b"\xff\xd8", "")},
data={"target_lang": "zh-TW"},
)
assert r.status_code == 200

def test_wrong_mime_for_ext_rejected(self, client):
# 副檔名 .jpg 但 MIME 明說是 video → 矛盾,擋下
r = client.post(
"/localization/translate/image",
files={"file": ("photo.jpg", b"\xff\xd8", "video/mp4")},
data={"target_lang": "zh-TW"},
)
assert r.status_code == 400 and "MIME" in r.json()["detail"]

def test_av_endpoint_accepts_audio_mime(self, client, monkeypatch):
import core.meeting.summarizer as msum
from core.meeting.summarizer import MeetingSummaryResult

monkeypatch.setattr(
msum.meeting_summarizer, "process_video",
lambda *a, **k: MeetingSummaryResult(
transcript="t", transcript_with_time="t",
summary={"full_summary": "s"}, duration=1.0, language="zh"))
r = client.post(
"/localization/meeting/summarize",
files={"file": ("rec.mp3", b"ID3", "audio/mpeg")},
data={"language": "zh-TW"},
)
assert r.status_code == 200 and r.json()["summary"] == {"full_summary": "s"}


# ---------- dub:url 來源不受上傳硬化影響 ----------
class TestDubSource:
def test_dub_url_skips_upload_validation(self, client, monkeypatch):
class _FakeDubber:
def process_video(self, source, src, tgt, burn_subtitles=False):
return {"dubbed_video": "/tmp/out.mp4"}

monkeypatch.setattr(loc, "get_video_dubber", lambda: _FakeDubber())
r = client.post("/localization/dub", data={
"url": "https://youtu.be/x", "target_lang": "zh-TW",
})
assert r.status_code == 200
assert r.json()["results"]["dubbed_video"] == "/tmp/out.mp4"

def test_dub_rejects_non_av_upload(self, client):
r = client.post(
"/localization/dub",
files={"file": ("x.exe", b"MZ", "application/octet-stream")},
data={"target_lang": "zh-TW"},
)
assert r.status_code == 400 and "副檔名" in r.json()["detail"]

def test_dub_accepts_mp4_upload(self, client, monkeypatch):
class _FakeDubber:
def process_video(self, source, src, tgt, burn_subtitles=False):
return {"dubbed_video": "/tmp/out.mp4"}

monkeypatch.setattr(loc, "get_video_dubber", lambda: _FakeDubber())
r = client.post(
"/localization/dub",
files={"file": ("clip.mp4", b"\x00\x00", "video/mp4")},
data={"target_lang": "zh-TW"},
)
assert r.status_code == 200


# ---------- 純函式單元 ----------
class TestValidatorUnit:
def test_rejects_bad_ext(self):
from fastapi import HTTPException, UploadFile
import io

up = UploadFile(filename="x.exe", file=io.BytesIO(b""))
with pytest.raises(HTTPException) as ei:
loc._validate_media_upload(up, loc._AV_EXTS, "影音",
mime_prefixes=("video/", "audio/"))
assert ei.value.status_code == 400

def test_accepts_good_ext_and_prefix(self):
from fastapi import UploadFile
import io

up = UploadFile(
filename="a.mp3", file=io.BytesIO(b""),
headers={"content-type": "audio/mpeg"})
# 不應拋
loc._validate_media_upload(up, loc._AV_EXTS, "影音",
mime_prefixes=("video/", "audio/"))
Loading