手把手教你使用RVC自带的Edge-TTS,实现文本转模型语音

tts.py:(可直接复制到记事本中)
import asyncio
import edge_tts
import sys
TEXT = sys.argv[1] if len(sys.argv) > 1 else "Hello World!"
VOICE = sys.argv[2] if len(sys.argv) > 2 else "en-GB-SoniaNeural"
rate = sys.argv[3] if len(sys.argv) > 3 else "+0%"
volume = sys.argv[4] if len(sys.argv) > 4 else "+0%"
OUTPUT_FILE = "abc.wav"
async def _main() -> None:
print(rate+" "+ volume)
communicate = edge_tts.Communicate(TEXT,VOICE,rate,volume)
await communicate.save(OUTPUT_FILE)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(_main())
finally:
loop.close()
#TTS功能需要的参数 :(自己注意下排版对齐)
text_input = gr.Textbox(label = "在此输入需要转译的文字(建议打开自动f0预测)",)
tts_spk = gr.Dropdown(label = "选择原始音频音色(来自微软TTS)", choices=["zh-CN-XiaoyiNeural", "zh-CN-YunxiNeural", "zh-CN-liaoning-XiaobeiNeural", "zh-CN-shaanxi-XiaoniNeural", "zh-HK-HiuMaanNeural", "zh-HK-WanLungNeural", "ja-JP-NanamiNeural", "ja-JP-KeitaNeural"], value = "zh-CN-XiaoyiNeural")
tts_rate = gr.Number(
label=i18n("TTS变速"), value=0
)
tts_volume = gr.Number(
label=i18n("TTS变调"), value=0
)
#butt0按钮创建
but00 = gr.Button(i18n("转换TTS"), variant="primary")
#butt0按钮事件
but00.click(
tts_fn,
[
text_input,
tts_spk,
tts_rate,
tts_volume,
sid0,
input_audio0,
vc_transform0,
f0_file,
f0method0,
file_index1,
file_index2,
# file_big_npy1,
index_rate1,
filter_radius0,
resample_sr0,
rms_mix_rate0,
protect0,
],
[vc_output1, vc_output2],api_name="vc_tts"
)
tts_fn():
def tts_fn(
_text, _speaker, tts_rate,tts_volume,
sid,
input_audio_path,
f0_up_key,
f0_file,
f0_method,
file_index,
file_index2,
# file_big_npy,
index_rate,
filter_radius,
resample_sr,
rms_mix_rate,
protect,
):
#更换模型
get_vc_self(sid);
tts_rate = int(tts_rate)
tts_volume = int(tts_volume)
tts_r ="";
tts_v ="";
if tts_rate>=0:
tts_r += "+"
if tts_volume>=0:
tts_v += "+"
#print(str(tts_rate)+" "+ str(tts_volume))
tts_r +="%s%%"%str(tts_rate);
tts_v += "%s%%"%str(tts_volume);
#调用TTS
subprocess.run([r"runtime\python.exe", "tts.py", _text,_speaker,tts_r,tts_v])#"-5%","+5%"])
#生成TTS语音写入本地文件
sr_44100 = 44100
y, sr = librosa.load("abc.wav")
resampled_y = librosa.resample(y, orig_sr=sr, target_sr=sr_44100)
sf.write("abc.wav", resampled_y, sr_44100, subtype = "PCM_16")
#生成的地址 RVC目录下的wav地址
input_audio = "Y:\\Download\\RVC-beta0717\\abc.wav"
#再执行wav转语音
info = vc_single(0,input_audio,f0_up_key,f0_file,f0_method,file_index, file_index2, # file_big_npy,
index_rate,
filter_radius,
resample_sr,
rms_mix_rate,
protect)
#一定要retrun
return info
get_vc_self():
# 一个选项卡全局只能有一个音色
def get_vc_self(sid):
global n_spk, tgt_sr, net_g, vc, cpt, version
if sid == "" or sid == []:
global hubert_model
if hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
print("clean_empty_cache")
del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
if torch.cuda.is_available():
torch.cuda.empty_cache()
###楼下不这么折腾清理不干净
if_f0 = cpt.get("f0", 1)
version = cpt.get("version", "v1")
if version == "v1":
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(
*cpt["config"], is_half=config.is_half
)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif version == "v2":
if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(
*cpt["config"], is_half=config.is_half
)
else:
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del net_g, cpt
if torch.cuda.is_available():
torch.cuda.empty_cache()
cpt = None
return {"visible": False, "__type__": "update"}
person = "%s/%s" % (weight_root, sid)
print("loading %s" % person)
cpt = torch.load(person, map_location="cpu")
tgt_sr = cpt["config"][-1]
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
if_f0 = cpt.get("f0", 1)
#if if_f0 == 0:
# to_return_protect0 = to_return_protect1 = {
# "visible": False,
# "value": 0.5,
# "__type__": "update",
# }
#else:
# to_return_protect0 = {
# "visible": True,
# "value": to_return_protect0,
# "__type__": "update",
# }
# to_return_protect1 = {
# "visible": True,
# "value": to_return_protect1,
# "__type__": "update",
# }
version = cpt.get("version", "v1")
if version == "v1":
if if_f0 == 1:
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
else:
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif version == "v2":
if if_f0 == 1:
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
else:
net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del net_g.enc_q
print(net_g.load_state_dict(cpt["weight"], strict=False))
net_g.eval().to(config.device)
if config.is_half:
net_g = net_g.half()
else:
net_g = net_g.float()
vc = VC(tgt_sr, config)
n_spk = cpt["config"][-3]
#return (
# {"visible": True, "maximum": n_spk, "__type__": "update"},
# to_return_protect0,
# to_return_protect1,
#)