diff --git a/ansage.py b/ansage.py index 3a5a52d..8b56bb0 100644 --- a/ansage.py +++ b/ansage.py @@ -5,29 +5,36 @@ from TTS.api import TTS device = "cuda" if torch.cuda.is_available() else "cpu" # List available 🐸TTS models -print(TTS().list_models()) +#print(TTS().list_models()) # Initialize TTS tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) # List speakers -print(tts.speakers) -# Run TTS -# ❗ XTTS supports both, but many models allow only one of the `speaker` and -# `speaker_wav` arguments - -# TTS with list of amplitude values as output, clone the voice from `speaker_wav` -#wav = tts.tts( -# text="Hello world!", -# speaker_wav="my/cloning/audio.wav", -# language="en" -#) +# Configure output path +output_path = "output.wav" # TTS to a file, use a preset speaker tts.tts_to_file( - text="Hallo Oliver, herzlichen Glückwunsch zur erfolgreichen Entlastung", - speaker_wav="./0248.wav", - language="en", - file_path="output2.wav" + text="Verehrte Fahrgäste, das heutige Besäufnis wird Ihnen gesponsert von, Rheinmetall", + speaker_wav=["./0248.wav"], + language="de", + file_path=output_path ) + +# Process Audio (geklaut von meinem guten Freund flon) + +from pydub import AudioSegment +from pydub.utils import make_chunks + +audio = AudioSegment.from_wav(output_path) +audio = audio.set_channels(1) +audio = audio.set_sample_width(2) +audio = audio.set_frame_rate(16000) + +silence = AudioSegment.silent(duration=1000) +gong = AudioSegment.from_wav("./513_1.wav") + +audio = silence + gong + audio +audio.export(output_path, format="wav")