-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvoice.py
More file actions
65 lines (51 loc) · 2.36 KB
/
voice.py
File metadata and controls
65 lines (51 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from melo.api import TTS
import os
import json
import torch
import argparse
from openvoice.api import ToneColorConverter
# 명령어 인자 파싱
parser = argparse.ArgumentParser(description="Voice conversion using TTS and ToneColorConverter")
parser.add_argument("--text", type=str, required=True, help="Input text to be converted to speech")
parser.add_argument("--vector", type=str, required=True, help="Target tone vector for conversion (comma-separated)")
parser.add_argument("--output", type=str, required=True, help="Output file path to save the converted voice")
args = parser.parse_args()
# 입력 데이터 설정
text = args.text
vector_str = args.vector # 문자열로 전달된 벡터
try:
# JSON 문자열을 리스트로 변환
vector_list = json.loads(vector_str)
# torch.Tensor로 변환
vector_tensor = torch.tensor(vector_list, device="cuda:0" if torch.cuda.is_available() else "cpu")
except json.JSONDecodeError:
raise ValueError(f"Invalid vector format: {vector_str}")
output_path = args.output
output_dir = os.path.dirname(output_path)
os.makedirs(output_dir, exist_ok=True) # 출력 폴더 생성
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# 텍스트를 음성으로 변환한 결과를 저장하기 위한 임시 파일 경로
src_path = os.path.join(output_dir, 'tmp.wav')
ckpt_converter = 'checkpoints_v2/converter'
tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
# 속도 조정
speed = 1.0
model = TTS(language='KR', device=device)
speaker_ids = model.hps.data.spk2id
for speaker_key in speaker_ids.keys():
speaker_id = speaker_ids[speaker_key]
speaker_key = speaker_key.lower().replace('_', '-')
source_se = torch.load(f'checkpoints_v2/base_speakers/ses/{speaker_key}.pth', map_location=device)
model.tts_to_file(text, speaker_id, src_path, speed=speed)
save_path = f'{output_dir}/output_v2_news.wav'
# Run the tone color converter (생성된 음성의 음색을 변환)
encode_message = "@MyShell"
tone_color_converter.convert(
audio_src_path=src_path,
src_se=source_se,
tgt_se=vector_tensor,
output_path=output_path,
message=encode_message)
result = {"status": "success", "output_file": output_path}
print(json.dumps(result))