| import torch |
| import os |
|
|
| import gradio as gr |
| from transformers import pipeline |
|
|
| from pyChatGPT import ChatGPT |
|
|
| from speechbrain.pretrained import Tacotron2 |
| from speechbrain.pretrained import HIFIGAN |
|
|
| import json |
| import soundfile as sf |
|
|
|
|
| device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
|
| print(f"Is CUDA available: {torch.cuda.is_available()}") |
| print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") |
|
|
| |
| pipe = pipeline( |
| task="automatic-speech-recognition", |
| model="openai/whisper-base.en", |
| chunk_length_s=30, |
| device=device, |
| ) |
|
|
| |
| session_token = os.environ.get("SessionToken") |
| api = ChatGPT(session_token=session_token) |
|
|
| |
| tacotron2 = Tacotron2.from_hparams( |
| source="speechbrain/tts-tacotron2-ljspeech", |
| savedir="tmpdir_tts", |
| overrides={"max_decoder_steps": 10000}, |
| run_opts={"device": device}, |
| ) |
| hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder") |
|
|
|
|
| def get_response_from_chatbot(text, reset_conversation): |
| try: |
| if reset_conversation: |
| api.refresh_auth() |
| api.reset_conversation() |
| resp = api.send_message(text) |
| response = resp["message"] |
| except: |
| response = "Sorry, the chatGPT queue is full. Please try again later." |
| return response |
|
|
|
|
| def chat(input_audio, chat_history, reset_conversation): |
| |
| message = pipe(input_audio)["text"] |
|
|
| |
| response = get_response_from_chatbot(message, reset_conversation) |
|
|
| |
| mel_output, mel_length, alignment = tacotron2.encode_text(response) |
| wav = hifi_gan.decode_batch(mel_output) |
| sf.write("out.wav", wav.squeeze().cpu().numpy(), 22050) |
|
|
| out_chat = [] |
| chat_history = chat_history if not reset_conversation else "" |
| if chat_history != "": |
| out_chat = json.loads(chat_history) |
|
|
| out_chat.append((message, response)) |
| chat_history = json.dumps(out_chat) |
|
|
| return out_chat, chat_history, "out.wav" |
|
|
|
|
| start_work = """async() => { |
| function isMobile() { |
| try { |
| document.createEvent("TouchEvent"); return true; |
| } catch(e) { |
| return false; |
| } |
| } |
| function getClientHeight() |
| { |
| var clientHeight=0; |
| if(document.body.clientHeight&&document.documentElement.clientHeight) { |
| var clientHeight = (document.body.clientHeight<document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight; |
| } else { |
| var clientHeight = (document.body.clientHeight>document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight; |
| } |
| return clientHeight; |
| } |
| |
| function setNativeValue(element, value) { |
| const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set; |
| const prototype = Object.getPrototypeOf(element); |
| const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set; |
| |
| if (valueSetter && valueSetter !== prototypeValueSetter) { |
| prototypeValueSetter.call(element, value); |
| } else { |
| valueSetter.call(element, value); |
| } |
| } |
| var gradioEl = document.querySelector('body > gradio-app').shadowRoot; |
| if (!gradioEl) { |
| gradioEl = document.querySelector('body > gradio-app'); |
| } |
| |
| if (typeof window['gradioEl'] === 'undefined') { |
| window['gradioEl'] = gradioEl; |
| |
| const page1 = window['gradioEl'].querySelectorAll('#page_1')[0]; |
| const page2 = window['gradioEl'].querySelectorAll('#page_2')[0]; |
| |
| page1.style.display = "none"; |
| page2.style.display = "block"; |
| window['div_count'] = 0; |
| window['chat_bot'] = window['gradioEl'].querySelectorAll('#chat_bot')[0]; |
| window['chat_bot1'] = window['gradioEl'].querySelectorAll('#chat_bot1')[0]; |
| chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0]; |
| prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0]; |
| window['chat_bot1'].children[1].textContent = ''; |
| |
| clientHeight = getClientHeight(); |
| new_height = (clientHeight-300) + 'px'; |
| chat_row.style.height = new_height; |
| window['chat_bot'].style.height = new_height; |
| window['chat_bot'].children[2].style.height = new_height; |
| window['chat_bot1'].style.height = new_height; |
| window['chat_bot1'].children[2].style.height = new_height; |
| prompt_row.children[0].style.flex = 'auto'; |
| prompt_row.children[0].style.width = '100%'; |
| |
| window['checkChange'] = function checkChange() { |
| try { |
| if (window['chat_bot'].children[2].children[0].children.length > window['div_count']) { |
| new_len = window['chat_bot'].children[2].children[0].children.length - window['div_count']; |
| for (var i = 0; i < new_len; i++) { |
| new_div = window['chat_bot'].children[2].children[0].children[window['div_count'] + i].cloneNode(true); |
| window['chat_bot1'].children[2].children[0].appendChild(new_div); |
| } |
| window['div_count'] = chat_bot.children[2].children[0].children.length; |
| } |
| if (window['chat_bot'].children[0].children.length > 1) { |
| window['chat_bot1'].children[1].textContent = window['chat_bot'].children[0].children[1].textContent; |
| } else { |
| window['chat_bot1'].children[1].textContent = ''; |
| } |
| |
| } catch(e) { |
| } |
| } |
| window['checkChange_interval'] = window.setInterval("window.checkChange()", 500); |
| } |
| |
| return false; |
| }""" |
|
|
| with gr.Blocks(title="Talk to chatGPT") as demo: |
| gr.Markdown("## Talk to chatGPT ##") |
| gr.HTML( |
| "<p> Demo uses <a href='https://huggingface.co/openai/whisper-base.en' class='underline'>Whisper</a> to convert the input speech" |
| " to transcribed text, <a href='https://chat.openai.com/chat' class='underline'>chatGPT</a> to generate responses, and <a" |
| " href='https://huggingface.co/speechbrain/tts-tacotron2-ljspeech' class='underline'>tacotron2</a> to convert the response to" |
| " output speech: </p>" |
| ) |
| gr.HTML("<p> <center><img src='https://raw.githubusercontent.com/sanchit-gandhi/codesnippets/main/pipeline.png' width='870'></center> </p>") |
| gr.HTML( |
| "<p>You can duplicate this space and use your own session token: <a style='display:inline-block'" |
| " href='https://huggingface.co/spaces/sanchit-gandhi/chatGPT?duplicate=true'><img" |
| " src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=10'" |
| " alt='Duplicate Space'></a></p>" |
| ) |
| gr.HTML( |
| "<p> Instructions on how to obtain your session token can be found in the video <a style='display:inline-block'" |
| " href='https://youtu.be/TdNSj_qgdFk?t=175'><font style='color:blue;weight:bold;'>here</font></a>." |
| " Add your session token by going to <i>Settings</i> -> <i>New secret</i> and add the token under the name <i>SessionToken</i>. </p>" |
| ) |
| with gr.Group(elem_id="page_1", visible=True) as page_1: |
| with gr.Box(): |
| with gr.Row(): |
| start_button = gr.Button("Let's talk to chatGPT! 🗣", elem_id="start-btn", visible=True) |
| start_button.click(fn=None, inputs=[], outputs=[], _js=start_work) |
|
|
| with gr.Group(elem_id="page_2", visible=False) as page_2: |
| with gr.Row(elem_id="chat_row"): |
| chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue")) |
| chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue")) |
| with gr.Row(): |
| prompt_input_audio = gr.Audio( |
| source="microphone", |
| type="filepath", |
| label="Record Audio Input", |
| ) |
| prompt_output_audio = gr.Audio() |
|
|
| reset_conversation = gr.Checkbox(label="Reset conversation?", value=False) |
| with gr.Row(elem_id="prompt_row"): |
| chat_history = gr.Textbox(lines=4, label="prompt", visible=False) |
| submit_btn = gr.Button(value="Send to chatGPT", elem_id="submit-btn").style( |
| margin=True, |
| rounded=(True, True, True, True), |
| width=100, |
| ) |
|
|
| submit_btn.click( |
| fn=chat, |
| inputs=[prompt_input_audio, chat_history, reset_conversation], |
| outputs=[chatbot, chat_history, prompt_output_audio], |
| ) |
|
|
| demo.launch(debug=True) |
|
|