欢迎光临散文网 会员登陆 & 注册

手把手教你使用RVC自带的Edge-TTS,实现文本转模型语音

2023-08-02 15:30 作者:还是辣个没头脑  | 我要投稿

tts.py:(可直接复制到记事本中)

import asyncio

import edge_tts

import sys


TEXT = sys.argv[1] if len(sys.argv) > 1 else "Hello World!"

VOICE = sys.argv[2] if len(sys.argv) > 2 else "en-GB-SoniaNeural"

rate = sys.argv[3] if len(sys.argv) > 3 else "+0%"

volume = sys.argv[4] if len(sys.argv) > 4 else "+0%"


OUTPUT_FILE = "abc.wav"


async def _main() -> None:

    print(rate+" "+ volume)

    communicate = edge_tts.Communicate(TEXT,VOICE,rate,volume)

    await communicate.save(OUTPUT_FILE)


if __name__ == "__main__":

    loop = asyncio.get_event_loop()

    try:

        loop.run_until_complete(_main())

    finally:

        loop.close()



#TTS功能需要的参数 :(自己注意下排版对齐)

                        text_input = gr.Textbox(label = "在此输入需要转译的文字(建议打开自动f0预测)",)

                        tts_spk = gr.Dropdown(label = "选择原始音频音色(来自微软TTS)", choices=["zh-CN-XiaoyiNeural", "zh-CN-YunxiNeural", "zh-CN-liaoning-XiaobeiNeural", "zh-CN-shaanxi-XiaoniNeural", "zh-HK-HiuMaanNeural", "zh-HK-WanLungNeural", "ja-JP-NanamiNeural", "ja-JP-KeitaNeural"], value = "zh-CN-XiaoyiNeural")

                        tts_rate = gr.Number(

                            label=i18n("TTS变速"), value=0

                        )

                        tts_volume = gr.Number(

                            label=i18n("TTS变调"), value=0

                        )

#butt0按钮创建


 but00 = gr.Button(i18n("转换TTS"), variant="primary")


#butt0按钮事件

                    but00.click(

                        tts_fn,

                        [

                            text_input,

                            tts_spk,

                            tts_rate,

                            tts_volume,

                            sid0,

                            input_audio0,

                            vc_transform0,

                            f0_file,

                            f0method0,

                            file_index1,

                            file_index2,

                            # file_big_npy1,

                            index_rate1,

                            filter_radius0,

                            resample_sr0,

                            rms_mix_rate0,

                            protect0,

                        ],

                        [vc_output1, vc_output2],api_name="vc_tts"

                    )


tts_fn():

def tts_fn(

     _text, _speaker, tts_rate,tts_volume,

    sid,

    input_audio_path,

    f0_up_key,

    f0_file,

    f0_method,

    file_index,

    file_index2,

    # file_big_npy,

    index_rate,

    filter_radius,

    resample_sr,

    rms_mix_rate,

    protect,

):  

      #更换模型

   get_vc_self(sid);


   tts_rate = int(tts_rate)

   tts_volume = int(tts_volume)

   tts_r ="";

   tts_v ="";

   if tts_rate>=0:

       tts_r += "+"

   if tts_volume>=0:

       tts_v += "+"

   #print(str(tts_rate)+" "+ str(tts_volume))


   tts_r +="%s%%"%str(tts_rate);

   tts_v += "%s%%"%str(tts_volume);


   #调用TTS

   subprocess.run([r"runtime\python.exe", "tts.py", _text,_speaker,tts_r,tts_v])#"-5%","+5%"])


   #生成TTS语音写入本地文件

   sr_44100 = 44100

   y, sr = librosa.load("abc.wav")

   resampled_y = librosa.resample(y, orig_sr=sr, target_sr=sr_44100)

   sf.write("abc.wav", resampled_y, sr_44100, subtype = "PCM_16")


   #生成的地址 RVC目录下的wav地址

   input_audio = "Y:\\Download\\RVC-beta0717\\abc.wav" 


   #再执行wav转语音

   info = vc_single(0,input_audio,f0_up_key,f0_file,f0_method,file_index, file_index2, # file_big_npy,

    index_rate,

    filter_radius,

    resample_sr,

    rms_mix_rate,

    protect)


   #一定要retrun

   return info 




get_vc_self():

# 一个选项卡全局只能有一个音色

def get_vc_self(sid):

    global n_spk, tgt_sr, net_g, vc, cpt, version

    if sid == "" or sid == []:

        global hubert_model

        if hubert_model is not None:  # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的

            print("clean_empty_cache")

            del net_g, n_spk, vc, hubert_model, tgt_sr  # ,cpt

            hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None

            if torch.cuda.is_available():

                torch.cuda.empty_cache()

            ###楼下不这么折腾清理不干净

            if_f0 = cpt.get("f0", 1)

            version = cpt.get("version", "v1")

            if version == "v1":

                if if_f0 == 1:

                    net_g = SynthesizerTrnMs256NSFsid(

                        *cpt["config"], is_half=config.is_half

                    )

                else:

                    net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])

            elif version == "v2":

                if if_f0 == 1:

                    net_g = SynthesizerTrnMs768NSFsid(

                        *cpt["config"], is_half=config.is_half

                    )

                else:

                    net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])

            del net_g, cpt

            if torch.cuda.is_available():

                torch.cuda.empty_cache()

            cpt = None

        return {"visible": False, "__type__": "update"}

    person = "%s/%s" % (weight_root, sid)

    print("loading %s" % person)

    cpt = torch.load(person, map_location="cpu")

    tgt_sr = cpt["config"][-1]

    cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk

    if_f0 = cpt.get("f0", 1)

    #if if_f0 == 0:

    #    to_return_protect0 = to_return_protect1 = {

    #        "visible": False,

    #        "value": 0.5,

    #        "__type__": "update",

    #    }

    #else:

    #    to_return_protect0 = {

    #        "visible": True,

    #        "value": to_return_protect0,

    #        "__type__": "update",

    #    }

    #    to_return_protect1 = {

    #        "visible": True,

    #        "value": to_return_protect1,

    #        "__type__": "update",

    #    }

    version = cpt.get("version", "v1")

    if version == "v1":

        if if_f0 == 1:

            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)

        else:

            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])

    elif version == "v2":

        if if_f0 == 1:

            net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)

        else:

            net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])

    del net_g.enc_q

    print(net_g.load_state_dict(cpt["weight"], strict=False))

    net_g.eval().to(config.device)

    if config.is_half:

        net_g = net_g.half()

    else:

        net_g = net_g.float()

    vc = VC(tgt_sr, config)

    n_spk = cpt["config"][-3]

    #return (

    #    {"visible": True, "maximum": n_spk, "__type__": "update"},

    #    to_return_protect0,

    #    to_return_protect1,

    #)


手把手教你使用RVC自带的Edge-TTS,实现文本转模型语音的评论 (共 条)

分享到微博请遵守国家法律