Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| #Get models | |
| #ASR model for input speech | |
| speech2text = gr.Interface.load("huggingface/facebook/hubert-large-ls960-ft", | |
| inputs=gr.inputs.Audio(label="Upload Audio", type="filepath", source = "upload")) | |
| #translates English to Spanish text | |
| translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-en-es", | |
| outputs=gr.outputs.Textbox(label="English to Spanish Translated Text")) | |
| #TTS model for output speech | |
| text2speech = gr.Interface.load("huggingface/facebook/tts_transformer-es-css10", | |
| outputs=gr.outputs.Audio(label="English to Spanish Translated Audio"), | |
| allow_flagging="never") | |
| translate = gr.Series(speech2text, translator) #outputs Spanish text translation | |
| en2es = gr.Series(translate, text2speech) #outputs Spanish audio | |
| ui = gr.Parallel(translate, en2es) #allows transcription of Spanish audio | |
| #gradio interface | |
| ui.title = "English to Spanish Speech Translator" | |
| ui.description = """<center>A useful tool in translating English to Spanish audio. All pre-trained models are found in huggingface.</center>""" | |
| ui.examples = [['ljspeech.wav'],['ljspeech2.wav'], ['longspeech.wav']] | |
| ui.allow_flagging = "never" | |
| ui.theme = "peach" | |
| ui.article = """<h2>Pre-trained model Information</h2> | |
| <h3>Automatic Speech Recognition</h3> | |
| <p style='text-align: justify'>The model used for the ASR part of this space is from | |
| <a href=\"https://huggingface.co/facebook/hubert-large-ls960-ft">hubert-large-ls960-ft</a> which is pretrained and fine-tuned on <b>960 hours of | |
| Librispeech</b> on 16kHz sampled speech audio. This model has a self-reported <b>word error rate (WER)</b> of <b>1.9 | |
| percent</b> and ranks first in <i>paperswithcode</i> for ASR on Librispeech. More information can be | |
| found on its website at <a href=\"https://ai.facebook.com/blog/hubert-self-supervised-representation-learning-for-speech- | |
| recognition- | |
| generation-and-compression">hubert-self</a> and | |
| original model is under <a href=\"https://github.com/pytorch/fairseq/tree/main/examples/hubert">pytorch/fairseq</a>.</p> | |
| <h3>Text Translator</h3> | |
| <p style='text-align: justify'>The English to Spanish text translator pre-trained model is from | |
| <a href=\"https://huggingface.co/Helsinki-NLP/opus-mt-en-es">Helsinki-NLP/opus-mt-en-es</a> which is part of the <b>The | |
| Tatoeba Translation Challenge | |
| (v2021-08-07)</b> as seen from its github repo at | |
| <a href=\"https://github.com/Helsinki-NLP/Tatoeba-Challenge">Helsinki-NLP/Tatoeba-Challenge</a>. This project aims to develop | |
| machine | |
| translation in real-world | |
| cases for many languages. </p> | |
| <h3>Text to Speech</h3> | |
| <p style='text-align: justify'> The TTS model used is from <a href=\"https://huggingface.co/facebook/tts_transformer-es- | |
| css10">facebook/tts_transformer-es- | |
| css10</a>. | |
| This model uses the <b>Fairseq(-py)</b> sequence modeling toolkit for speech synthesis, in this case, specifically TTS | |
| for Spanish. More information can be seen on their git at | |
| <a href=\"https://github.com/pytorch/fairseq/tree/main/examples/speech_synthesis">speech_synthesis</a>. </p> | |
| """ | |
| ui.launch(inbrowser=True) | |