diff --git a/OpenTranslator/translator.py b/OpenTranslator/translator.py index 7426ce4..a181626 100644 --- a/OpenTranslator/translator.py +++ b/OpenTranslator/translator.py @@ -10,6 +10,8 @@ import os import unicodedata +from transformers import pipeline + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") class CustomTranslator: @@ -19,147 +21,218 @@ def __init__(self, output_dir="output"): self.translation_method = "" self.output_dir = output_dir os.makedirs(self.output_dir, exist_ok=True) - # Initialize other attributes as needed - def load_models(self): + def load_whisper_model(self): self.processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3") self.model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3").to(device) - # self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) - def process_audio_chunk(self, input_path, target_language, chunk_idx, output_path, translation_method): - try: - if translation_method == 'Local': - self.load_models() - start_time = time.time() - # Load input audio file using librosa - input_waveform, input_sampling_rate = librosa.load(input_path, sr=None, mono=True) + def unload_whisper_model(self): + del self.processor + del self.model - # Convert NumPy array to PyTorch tensor if needed - if not isinstance(input_waveform, torch.Tensor): - input_waveform = torch.tensor(input_waveform) + def load_mbart_model(self): + self.mbart_model = MBartForConditionalGeneration.from_pretrained("SnypzZz/Llama2-13b-Language-translate").to(device) + self.mbart_tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang="en_XX", device=device) - forced_decoder_ids = self.processor.get_decoder_prompt_ids(language=target_language, task="translate") + def unload_mbart_model(self): + del self.mbart_model + del self.mbart_tokenizer + + def load_tts_model(self): + self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) - # Ensure the input audio has a proper frame rate - if input_sampling_rate != 16000: - resampler = torchaudio.transforms.Resample(orig_freq=input_sampling_rate, new_freq=16000) - input_waveform = resampler(input_waveform) + def unload_tts_model(self): + del self.tts - # Process the input audio with the processor - input_features = self.processor(input_waveform.numpy(), sampling_rate=16000, return_tensors="pt") + def process_audio_chunk(self, input_path, target_language, chunk_idx, output_path, translation_method , batch_size=4): + try: + start_time = time.time() + + self.load_whisper_model() + + # Load audio waveform + input_waveform, input_sampling_rate = librosa.load(input_path, sr=None, mono=True) + + if not isinstance(input_waveform, torch.Tensor): + input_waveform = torch.tensor(input_waveform) + + if input_sampling_rate != 16000: + resampler = torchaudio.transforms.Resample(orig_freq=input_sampling_rate, new_freq=16000) + input_waveform = resampler(torch.tensor(input_waveform).clone().detach()).numpy() + + # Prepare forced decoder IDs + forced_decoder_ids = self.processor.get_decoder_prompt_ids(language=target_language, task="translate") + + # Create batches of input features + input_features = self.processor( + input_waveform, + sampling_rate=16000, + return_tensors="pt", + padding=True + ) + input_features = {k: v.to(device) for k, v in input_features.items()} + input_batches = torch.split(input_features["input_features"], batch_size, dim=0) + + # Process batches + transcriptions = [] + for batch in input_batches: + with torch.no_grad(): + predicted_ids = self.model.generate(batch, forced_decoder_ids=forced_decoder_ids, max_length=448) + transcriptions.extend(self.processor.batch_decode(predicted_ids, skip_special_tokens=True)) + + # Combine transcriptions + transcription = " ".join(transcriptions) + + del input_waveform, input_sampling_rate + + end_time = time.time() + execution_time = (end_time - start_time) / 60 + print(f"Transcription Execution time: {execution_time:.2f} minutes") + + words = transcription.split() + cleaned_words = [words[0]] + for word in words[1:]: + if word != cleaned_words[-1]: + cleaned_words.append(word) + cleaned_str = ' '.join(cleaned_words) + + sentences = cleaned_str.split('.') + cleaned_sentences = [sentences[0]] + for sentence in sentences[1:]: + if sentence != cleaned_sentences[-1]: + cleaned_sentences.append(sentence) + cleaned_transcription = '.'.join(cleaned_sentences) + + transcription = cleaned_transcription + print('Speech recognition and translate to English text: ' + str(transcription)) + + Translation_chunk_output_path = os.path.join(self.output_dir, f"{os.path.splitext(os.path.basename(output_path))[0]}_Translation_chunk{chunk_idx + 1}.wav") + + if target_language != 'en' and translation_method == 'Llama2-13b': + print("Local text translation started..") + start_time = time.time() + self.load_mbart_model() - # Move input features to the device used by the model - input_features = {k: v.to(device) for k, v in input_features.items()} + inputs = self.mbart_tokenizer(transcription, return_tensors="pt") + input_ids = inputs["input_ids"].to(device) - # Generate token ids - predicted_ids = self.model.generate(input_features["input_features"], forced_decoder_ids=forced_decoder_ids) + language_mapping = { + "en": "en_XX", "es": "es_XX", "fr": "fr_XX", "de": "de_DE", + "ja": "ja_XX", "ko": "ko_KR", "tr": "tr_TR", "ar": "ar_AR", + "ru": "ru_RU", "he": "he_IL", "hi": "hi_IN", "it": "it_IT", + "pt": "pt_XX", "zh": "zh_CN", "cs": "cs_CZ", "nl": "nl_XX", "pl": "pl_PL", + } + model_target_language = language_mapping.get(target_language, "en_XX") - # Decode token ids to text - transcription = self.processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] + # Generate tokens on the GPU + generated_tokens = self.mbart_model.generate(input_ids=input_ids, forced_bos_token_id=self.mbart_tokenizer.lang_code_to_id[model_target_language]) - del input_waveform, input_sampling_rate + # Decode and join the translated text + translated_text = self.mbart_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) + translated_text = ", ".join(translated_text) + self.unload_mbart_model() + + print('Mbart Translation: '+ str(translated_text)) end_time = time.time() execution_time = (end_time - start_time) / 60 print(f"Transcription Execution time: {execution_time:.2f} minutes") - # Fix a bug: Text validation check if we have duplicate successive words - words = transcription.split() - cleaned_words = [words[0]] - - for word in words[1:]: - if word != cleaned_words[-1]: - cleaned_words.append(word) - - cleaned_str = ' '.join(cleaned_words) - - transcription = cleaned_str - - # Fix duplicate successive sentences - sentences = transcription.split('.') - cleaned_sentences = [sentences[0]] - - for sentence in sentences[1:]: - if sentence != cleaned_sentences[-1]: - cleaned_sentences.append(sentence) - - cleaned_transcription = '.'.join(cleaned_sentences) - - transcription = cleaned_transcription - print('Speech recognition and translate to English text: ' + str(transcription)) - - Translation_chunk_output_path = os.path.join(self.output_dir, f"{os.path.splitext(os.path.basename(output_path))[0]}_Translation_chunk{chunk_idx + 1}.wav") - - # If target language is English, skip text translation - if target_language != 'en': - # Local text translation - print("Local text translation started..") - start_time = time.time() - tt = MBartForConditionalGeneration.from_pretrained("SnypzZz/Llama2-13b-Language-translate").to(device) - tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang="en_XX", device=device) - - # Tokenize and convert to PyTorch tensor - inputs = tokenizer(transcription, return_tensors="pt") - input_ids = inputs["input_ids"].to(device) - - # Map target languages to model language codes - language_mapping = { - "en": "en_XX", - "es": "es_XX", - "fr": "fr_XX", - "de": "de_DE", - "ja": "ja_XX", - "ko": "ko_KR", - "tr": "tr_TR", - "ar": "ar_AR", - "ru": "ru_RU", - "he": "he_IL", - "hi": "hi_IN", - "it": "it_IT", - "pt": "pt_XX", - "zh": "zh_CN", - "cs": "cs_CZ", - "nl": "nl_XX", - "pl": "pl_PL", - } - - # Set the target language based on the mapping - model_target_language = language_mapping.get(target_language, "en_XX") - - # Generate tokens on the GPU - generated_tokens = tt.generate(input_ids=input_ids, forced_bos_token_id=tokenizer.lang_code_to_id[model_target_language]) - - # Decode and join the translated text - translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) - translated_text = ", ".join(translated_text) - - logging.info(f"Processing successful. Translated text: {translated_text}") - end_time = time.time() - execution_time = (end_time - start_time) / 60 - print(f"Local Translation Execution time: {execution_time:.2f} minutes") - - if target_language == 'en': - translated_text = transcription - - # Generate final audio output from translated text - self.generate_audio(translated_text, Translation_chunk_output_path, target_language, input_path) - - # Log success - logging.info(f"Translation successful for {input_path}. Translated text: {transcription}") - return translated_text + if target_language == 'en': + translated_text = transcription + + if target_language != 'en' and translation_method == 'TowerInstruct-7B': + translated_text = self.validate_translation(transcription, target_language) + + self.generate_audio(translated_text, Translation_chunk_output_path, target_language, input_path) + + return translated_text + self.unload_whisper_model() except Exception as e: - # Log errors logging.error(f"Error processing audio: {e}") - raise # Re-raise the exception + return "An Error occurred!", None + + def validate_translation(self, source_text, target_language): + print('validate_translation started ..') + start_time = time.time() + + languages = { + "English": "en", + "Spanish": "es", + "French": "fr", + "German": "de", + "Korean": "ko", + "Russian": "ru", + "Italian": "it", + "Portuguese": "pt", + "Chinese (Mandarin)": "zh", + "Dutch": "nl" + } + + code_to_language = {code: lang for lang, code in languages.items()} + target_language = code_to_language.get(target_language, "Unknown language") + + #supports 10 languages: English, German, French, Spanish, Chinese, Portuguese, Italian, Russian, Korean, and Dutch + pipe = pipeline("text-generation", model="Unbabel/TowerInstruct-7B-v0.2", torch_dtype=torch.bfloat16, device_map=device) + # We use the tokenizer’s chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating + messages = [ + { + "role": "user", + "content": ( + f"Translate the following text from English into {target_language}.\n" + f"English: {source_text}\n" + f"{target_language}:" + ), + } + ] + + #print(target_language) + prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + outputs = pipe(prompt, max_new_tokens=256, do_sample=False) + generated_text = outputs[0]["generated_text"] + + #translated_text = generated_text.split("English:")[-1].strip() + + # Further sanitize to remove undesired formatting tokens + generated_text = ( + generated_text.replace("<|im_start|>", "") + .replace("<|im_end|>", "") + .strip() + ) + + # Define the unwanted substrings in a list + unwanted_substrings = [ + target_language, + source_text, + 'assistant', + 'Translate the following text from English into .', + '\n', + 'English:', + ':' + ] + + # Remove the unwanted substrings + translated_text = generated_text.split("\n", 1)[-1].strip() # Split and strip the first line + for substring in unwanted_substrings: + translated_text = translated_text.replace(substring, '') + + print(f'validate_translation: {translated_text}') + end_time = time.time() + execution_time = (end_time - start_time) / 60 + print(f"Generate_audio Execution time: {execution_time:.2f} minutes") + return translated_text def generate_audio(self, text, output_path, target_language, input_path): print("Generate audio") - - # Text to speech to a file start_time = time.time() - self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) + + self.load_tts_model() + self.tts.tts_to_file(text=text, speaker_wav=input_path, language=target_language, file_path=output_path) + end_time = time.time() execution_time = (end_time - start_time) / 60 print(f"Generate_audio Execution time: {execution_time:.2f} minutes") + + self.unload_tts_model() diff --git a/Screenshot.png b/Screenshot.png index 13fe742..cf157ba 100644 Binary files a/Screenshot.png and b/Screenshot.png differ diff --git a/WebUI.py b/WebUI.py index ca31a49..29712b8 100644 --- a/WebUI.py +++ b/WebUI.py @@ -8,6 +8,7 @@ from OpenTranslator.translator import CustomTranslator import unicodedata import librosa +from datetime import datetime current_dir = os.path.dirname(os.path.abspath(__file__)) # Initialize the translator instance with an output directory @@ -36,34 +37,144 @@ "Polish": "pl" } -language_choices = [(lang, code) for lang, code in languages.items()] - # Define the translation options -TextTranslationOption = ["Local"] +TextTranslationOption = ["Llama2-13b","TowerInstruct-7B"] + +# Function to toggle button state +def toggle_button(): + # Access the current state without parentheses + if state.value: # Current state is True + state.value = False # Toggle to False + return "OFF" + else: # Current state is False + state.value = True # Toggle to True + return "ON" + +# Initial button state +initial_state = False +initial_label = "OFF" # Function to handle file uploads def upload_file(file): global audio_path audio_path = file.name - #return f"Selected File Title: {os.path.basename(audio_path)}" + +def enhance_audio(input_file, reference_file, output_file, bitrate="320k", volume_boost="10dB"): + """ + Enhances the input audio and matches the timing of the reference audio file. + """ + try: + # Verify that the input file and reference file exist + if not os.path.isfile(input_file): + raise FileNotFoundError(f"Input file not found: {input_file}") + if not os.path.isfile(reference_file): + raise FileNotFoundError(f"Reference file not found: {reference_file}") + + # Extract the duration of the reference file (to match timing) + command_duration = [ + "ffmpeg", + "-i", reference_file, + "-f", "null", + "-" + ] + result = subprocess.run(command_duration, stderr=subprocess.PIPE, text=True) + duration_line = [line for line in result.stderr.splitlines() if "Duration" in line] + if not duration_line: + raise Exception("Unable to extract duration from reference file") + + duration_str = duration_line[0].split("Duration:")[1].split(",")[0].strip() + hours, minutes, seconds = map(float, duration_str.split(":")) + reference_duration = hours * 3600 + minutes * 60 + seconds # duration in seconds + + # Define filters for audio processing + noise_reduction_filter = "afftdn" # Adaptive filter for noise reduction + normalization_filter = "loudnorm" # EBU R128 normalization + dynamic_compression_filter = "acompressor" # Dynamic range compression + equalizer_filter = "equalizer=f=1000:t=q:w=0.5:g=5" + volume_filter = f"volume={volume_boost}" + echo_cancellation_filter = "aecho=0.8:0.88:6:0.4" + + # Combine the filters + audio_filters = ( + f"{noise_reduction_filter}," + f"{normalization_filter}," + f"{dynamic_compression_filter}," + f"{echo_cancellation_filter}," + f"{equalizer_filter}," + f"{volume_filter}" + ) + + # Build the ffmpeg command to enhance the audio + command_enhance = [ + "ffmpeg", + "-i", (input_file), + "-af", audio_filters, + "-b:a", bitrate, # High bitrate for best quality + "-async", "1", # Ensure timing consistency + (output_file) + ] + print(f"Running command to enhance audio: {' '.join(command_enhance)}") + + # Execute the command to enhance the audio + subprocess.run(command_enhance, check=True) + + tempOutputFile = str(output_file)+'_tt.mp3' + + # Now, adjust the duration of the enhanced audio to match the reference file + command_adjust_timing = [ + "ffmpeg", + "-i", output_file, + "-t", str(reference_duration), # Set duration to match reference + "-c", "copy", # Copy the audio codec to avoid re-encoding + tempOutputFile + ] + print(f"Running command to adjust timing: {' '.join(command_adjust_timing)}") + + # Execute the command to adjust the duration of the enhanced audio + subprocess.run(command_adjust_timing, check=True) + + print(f"Enhanced audio saved to {output_file}, timing matched to reference file") + + # Replace the original file with the enhanced version + os.remove(output_file) + os.rename(tempOutputFile, output_file) + + print(f"Replaced original file with enhanced audio: {input_file}") + + except subprocess.CalledProcessError as e: + print(f"Error during audio enhancement: {e}") + except Exception as e: + print(f"Unexpected error: {e}") # Function to run the translation process def run_translation(translation_method, target_lang): - output_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(audio_path))[0]}_translated.mp3") + valid_methods = ['Llama2-13b', 'TowerInstruct-7B'] + if translation_method not in valid_methods: + raise ValueError(f"Invalid translation method: {translation_method}") + if translation_method == 'Llama2-13b': + target_lang = languages.get(target_lang) + if translation_method == 'TowerInstruct-7B': + target_lang = TowerInstruct_languages.get(target_lang) + + current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + output_path = os.path.normpath(os.path.join(output_dir, f"{os.path.splitext(os.path.basename(audio_path))[0]}_translated_To_{target_lang}_{translation_method}_{current_time}.mp3")) + if not output_path.startswith(output_dir): + raise Exception("Invalid output path") input_file = audio_path print(audio_path) input_duration = get_audio_duration(input_file) + max_chunk_duration = 30 + num_chunks = int(input_duration / max_chunk_duration) print('input_duration: '+str(input_duration)) - if input_duration > 30: - max_chunk_duration = 30 - num_chunks = int(input_duration / max_chunk_duration) + + if input_duration > 30: + print('Duration more then 30 sec - num_chunks: '+str(num_chunks)) chunk_files = [] Translation_chunk_files = [] translated_text = [] for chunk_idx in range(num_chunks): - print('duration more then 30- num_chunks: '+str(num_chunks)) - print('duration more then 30- chunk_idx'+str(chunk_idx)) + print('Current Chunk_idx'+str(chunk_idx)) start_time = chunk_idx * max_chunk_duration end_time = min((chunk_idx + 1) * max_chunk_duration, input_duration) chunk_output_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(output_path))[0]}_chunk{chunk_idx + 1}.wav") @@ -87,11 +198,20 @@ def run_translation(translation_method, target_lang): final_output_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(output_path))[0]}-temp.wav") - if translation_method == 'Local': - merge_audio_files(Translation_chunk_files, final_output_path) + merge_audio_files(Translation_chunk_files, final_output_path) - subprocess.run(['ffmpeg', '-i', final_output_path, '-codec:a', 'libmp3lame', output_path], check=True) - os.remove(final_output_path) + if state.value == True: + print('Improve_Audio_Quality started ..') + tmp_output_file = str(output_path)+'_tmp.mp3' + #convert to mp3 final audio file + subprocess.run(['ffmpeg', '-i', final_output_path, '-codec:a', 'libmp3lame', tmp_output_file], check=True) + reference_file = input_file + enhance_audio(tmp_output_file, reference_file, output_path) + os.remove(final_output_path) + os.remove(tmp_output_file) + else: + subprocess.run(['ffmpeg', '-i', final_output_path, '-codec:a', 'libmp3lame', output_path], check=True) + os.remove(final_output_path) delete_chunk_files(chunk_files) delete_chunk_files(Translation_chunk_files) @@ -101,11 +221,10 @@ def run_translation(translation_method, target_lang): translation_result = ', '.join(translated_text) return translation_result, output_path - if input_duration <= 30 and translation_method == 'Local': - #translated_text = [] + if input_duration <= 30 and num_chunks <= 1: chunk_output_path = input_file - chunk_idx = 0 - print('duration less then 30') + + print('duration less or equal to 30 sec') try: translation_result = translator_instance.process_audio_chunk(chunk_output_path, target_lang, @@ -114,12 +233,23 @@ def run_translation(translation_method, target_lang): print(f"{e}") return "An Error occurred!" - #translated_text.append(translated_text) - Translation_chunk_output_path = os.path.join(output_dir, f"{os.path.splitext(os.path.basename(output_path))[0]}_Translation_chunk1.wav") + Translation_chunk_output_path = os.path.normpath(os.path.join(output_dir, f"{os.path.splitext(os.path.basename(output_path))[0]}_Translation_chunk1.wav")) + if not Translation_chunk_output_path.startswith(output_dir): + raise Exception("Invalid translation chunk output path") + + #add audio timing hack + if state.value == True: + tmp_output_file = str(output_path)+'_tmp.mp3' + subprocess.run(['ffmpeg', '-i', Translation_chunk_output_path, '-codec:a', 'libmp3lame', tmp_output_file], check=True) + reference_file = input_file + enhance_audio(tmp_output_file, reference_file, output_path) + os.remove(Translation_chunk_output_path) + os.remove(tmp_output_file) + + else: + subprocess.run(['ffmpeg', '-i', Translation_chunk_output_path, '-codec:a', 'libmp3lame', output_path], check=True) + os.remove(Translation_chunk_output_path) - subprocess.run(['ffmpeg', '-i', Translation_chunk_output_path, '-codec:a', 'libmp3lame', output_path], check=True) - os.remove(Translation_chunk_output_path) - return translation_result, output_path # Function to split audio into a chunk using ffmpeg @@ -159,31 +289,73 @@ def delete_chunk_files(files): def upload_audio(audio_file): return audio_file +TowerInstruct_languages = { + "English": "en", + "Spanish": "es", + "French": "fr", + "German": "de", + "Korean": "ko", + "Russian": "ru", + "Italian": "it", + "Portuguese": "pt", + "Chinese (Mandarin)": "zh", + "Dutch": "nl" +} + +model_languages = { + "Llama2-13b": list(languages.keys()), + "TowerInstruct-7B": list(TowerInstruct_languages.keys()) +} + +def update_languages(selected_model): + supported_languages = model_languages[selected_model] + return gr.update(choices=supported_languages, value=supported_languages[0]) + # Define the Gradio interface with gr.Blocks() as demo: - gr.Markdown("# Open Translator") + demo.clear() + gr.Markdown("# Open Translator WebUi") with gr.Row(): with gr.Column(): - #gr.Markdown("## Select Translation Method:") - translation_method = gr.Dropdown(choices=TextTranslationOption, value=TextTranslationOption[0], label="Translation Method") + translation_method = gr.Dropdown(choices=TextTranslationOption, value=TextTranslationOption[0], label="Translation Method") gr.Markdown("## Select Audio File:") - audio_file = gr.File(type="filepath", label="Upload Audio File") + audio_file = gr.File(type="filepath", label="Select The Audio File") audio_player = gr.Audio(label="Audio Player", interactive=True) - #file_title = gr.Textbox(label="Selected File Title") audio_file.upload(upload_file, audio_file) audio_file.change(upload_audio, audio_file, audio_player) - gr.Markdown("## Select Target Language:") - target_lang = gr.Dropdown(choices=language_choices, value="ar", label="Target Language") - #print(target_lang) - translate_button = gr.Button("translate") + gr.Markdown("## Optimize Output Audio file Quality:") + state = gr.State(value=initial_state) # Internal state to track the toggle + button = gr.Button(initial_label) + + # Set up button click behavior + button.click( + toggle_button, + outputs=[button] + ) + + gr.Markdown("## Select Language:") + target_lang = gr.Dropdown( + choices=model_languages["Llama2-13b"], + value=model_languages["Llama2-13b"][0], + label="Translate To" + ) + + translation_method.change( + update_languages, + inputs=translation_method, + outputs=target_lang + ) + + translate_button = gr.Button("Start Translation") with gr.Column(): - translated_text = gr.Textbox(label="Translated text", lines=20, interactive=False) - audio_output = gr.Audio(label="Translated Audio") + translated_text = gr.Textbox(label="Translated text Result", lines=20, interactive=False) + audio_output = gr.Audio(label="Translated Audio Result") translate_button.click(run_translation, inputs=[translation_method, target_lang], outputs=[translated_text, audio_output]) -demo.launch(server_name="127.0.0.2", server_port=7861) \ No newline at end of file + +demo.launch(server_name="127.0.0.1", server_port=7861) \ No newline at end of file diff --git a/readme.md b/readme.md index 9b87caf..1cd99ed 100644 --- a/readme.md +++ b/readme.md @@ -107,7 +107,7 @@ python WebUI.py - [cs-test.mp3](./testResults/cs-test.mp3) - [de-test.mp3](./testResults/de-test.mp3) - [en-test.mp3](./testResults/en-test.mp3) -- [es-Original-Audio.mp3](./testResults/es-Original-Audio.mp3) +- [es-test.mp3](./testResults/es-test.mp3) - [fr-test.mp3](./testResults/fr-test.mp3) - [hi-test.mp3](./testResults/hi-test.mp3) - [hu-test.mp3](./testResults/hu-test.mp3)