index-tts-vllm/api_example_v2.py at master · Ksuriuri/index-tts-vllm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from dataclasses import asdict, dataclass
import os
from typing import List, Optional
import requests

SERVER_PORT = 6006
output_dir = "outputs"
os.makedirs(output_dir, exist_ok=True)

url = F"http://0.0.0.0:{SERVER_PORT}/tts_url"


@dataclass
class IndexTTS2RequestData:
    text: str
    spk_audio_path: str
    emo_control_method: int = 0
    emo_ref_path: Optional[str] = None
    emo_weight: float = 1.0
    emo_vec: List[float] = None
    emo_text: Optional[str] = None
    emo_random: bool = False
    max_text_tokens_per_sentence: int = 120

    def __post_init__(self):
        # 保证 emo_vec 默认长度为 8 的 0 向量
        if self.emo_vec is None:
            self.emo_vec = [0.0] * 8

    def to_dict(self) -> str:
        return asdict(self)


# 1. 情感与音色参考音频相同
data = IndexTTS2RequestData(
    text="还是会想你，还是想登你",
    spk_audio_path="assets/jay_promptvn.wav"
)

response = requests.post(url, json=data.to_dict())
with open(os.path.join(output_dir, "output1.wav"), "wb") as f:
    f.write(response.content)


# 2. 使用情感参考音频
data = IndexTTS2RequestData(
    text="还是会想你，还是想登你",
    spk_audio_path="assets/jay_promptvn.wav",
    emo_control_method=1,
    emo_ref_path="assets/vo_card_klee_endOfGame_fail_01.wav",
    emo_weight=0.6
)

response = requests.post(url, json=data.to_dict())
with open(os.path.join(output_dir, "output2.wav"), "wb") as f:
    f.write(response.content)


# 3. 使用情感向量控制
# ["喜", "怒", "哀", "惧", "厌恶", "低落", "惊喜", "平静"]
emo_vec = [0, 0, 0.55, 0, 0, 0, 0, 0]

data = IndexTTS2RequestData(
    text="还是会想你，还是想登你",
    spk_audio_path="assets/jay_promptvn.wav",
    emo_control_method=2,
    emo_vec=emo_vec
)

response = requests.post(url, json=data.to_dict())
with open(os.path.join(output_dir, "output3.wav"), "wb") as f:
    f.write(response.content)


# 4. 使用情感描述文本控制
data = IndexTTS2RequestData(
    text="还是会想你，还是想登你",
    spk_audio_path="assets/jay_promptvn.wav",
    emo_control_method=3,
    emo_text="极度悲伤"
)

response = requests.post(url, json=data.to_dict())
with open(os.path.join(output_dir, "output4.wav"), "wb") as f:
    f.write(response.content)